def generate_csv_files(args, data_handler): """ Generate the CSV file used to call the R script. :param Namespace args: :param DataHandler data_handler: """ assert isinstance(args, argparse.Namespace) assert isinstance(data_handler, DataHandler) biomarkers = data_handler.get_biomarker_names() measurements = data_handler.get_measurements_as_dict(min_visits=args.min_visits, select_training_set=True, exclude_deceased=args.exclude_deceased) for biomarker in biomarkers: print log.INFO, 'Generating output CSV for {0}...'.format(biomarker) samples_file = data_handler.get_samples_file(biomarker) writer = csv.writer(open(samples_file, 'wb'), delimiter=',') writer.writerow(['rid', 'progress', 'value', 'diagnosis']) subjects = set() num_samples = 0 for rid, visits in measurements.items(): for _, visit_data in visits.items(): try: progress = DataHandler.safe_cast(visit_data['progress'], int) value = DataHandler.safe_cast(visit_data[biomarker], float) diagnosis = DataHandler.safe_cast(visit_data['DX.scan'], float) if progress is not None and value is not None: writer.writerow([rid, progress, value, diagnosis]) subjects.add(rid) num_samples += 1 except KeyError: pass print log.RESULT, 'Collected {0} samples from {1} subjects.'.format(num_samples, len(subjects))
def print_training_samples_statistics(args, data_handler): biomarkers = data_handler.get_biomarker_names() measurements = data_handler.get_measurements_as_dict(min_visits=args.min_visits, select_training_set=True, no_regression=True) for biomarker in biomarkers: subjects = set() num_samples = 0 for rid, visits in measurements.items(): for _, visit_data in visits.items(): try: progress = DataHandler.safe_cast(visit_data['progress'], int) value = DataHandler.safe_cast(visit_data[biomarker], float) if progress is not None and value is not None: subjects.add(rid) num_samples += 1 except KeyError: pass print log.RESULT, 'Biomarker {0}: collected {1} samples from {2} subjects.'.format(biomarker, num_samples, len(subjects))
def print_training_samples_statistics(args, data_handler): biomarkers = data_handler.get_biomarker_names() measurements = data_handler.get_measurements_as_dict( min_visits=args.min_visits, select_training_set=True, no_regression=True) for biomarker in biomarkers: subjects = set() num_samples = 0 for rid, visits in measurements.items(): for _, visit_data in visits.items(): try: progress = DataHandler.safe_cast(visit_data['progress'], int) value = DataHandler.safe_cast(visit_data[biomarker], float) if progress is not None and value is not None: subjects.add(rid) num_samples += 1 except KeyError: pass print log.RESULT, 'Biomarker {0}: collected {1} samples from {2} subjects.'.format( biomarker, num_samples, len(subjects))