def read_measurements_from_cvs(filename): """ Created a dict from the sample measurements file. For compatibility with the library, the dict has to have the { <rid> : { <viscode> : { DX.scan : <diagnosis> } { scantime : <days after bl> } { <biomarker1> : <volume> } ... } { <viscode> : ... }} :param filename: filename of the *.csv file :rtype: dict :return: the generated dict with the measurements """ scantime_dict = {'bl': 0, 'm12': 365, 'm24': 730, 'm36': 1095} biomarkers = set() measurements = {0: {}} with open(filename) as csvfile: reader = csv.DictReader(csvfile) visits = reader.fieldnames[1:] for visit in visits: measurements[0].update({visit: {'scantime': scantime_dict[visit], 'DX.scan': 'UNKNOWN'}}) for row in reader: biomarker = row['Biomarker Name'] if biomarker in DataHandler.get_all_biomarker_names(): for visit in visits: try: measurements[0][visit].update({biomarker: float(row[visit])}) biomarkers.add(biomarker) except ValueError: pass return measurements, list(biomarkers)
def main(): # Collect data for test data_handler = DataHandler.get_data_handler() biomarkers = DataHandler.get_all_biomarker_names() mean_changes = {} for biomarker in biomarkers: measurements = data_handler.get_measurements_as_dict( visits=['bl', 'm12'], biomarkers=[biomarker], select_complete=True) mean_changes_biomarker = {0.0: 0.0, 0.25: 0.0, 0.75: 0.0, 1.0: 0.0} num_subjects = {0.0: 0, 0.25: 0, 0.75: 0, 1.0: 0} for rid in measurements: diagnosis = measurements[rid]['bl']['DX.scan'] value_bl = measurements[rid]['bl'][biomarker] value_y1 = measurements[rid]['m12'][biomarker] scantime_bl = measurements[rid]['bl']['scantime'] scantime_y1 = measurements[rid]['m12']['scantime'] change = (value_y1 - value_bl) / (scantime_y1 - scantime_bl) mean_changes_biomarker[diagnosis] += change num_subjects[diagnosis] += 1 mean_change_mci_ad = mean_changes_biomarker[ 0.25] + mean_changes_biomarker[0.75] + mean_changes_biomarker[1.0] num_subjects_mci_ad = num_subjects[0.25] + num_subjects[ 0.75] + num_subjects[1.0] for diagnosis in mean_changes_biomarker: mean_changes_biomarker[diagnosis] /= num_subjects[diagnosis] mean_changes_biomarker.update( {0.66: mean_change_mci_ad / num_subjects_mci_ad}) mean_changes.update({biomarker: mean_changes_biomarker}) print log.RESULT, '{0} CN: {1}, (n={2})'.format( biomarker, mean_changes_biomarker[0.0], num_subjects[0.0]) print log.RESULT, '{0} EMCI: {1}, (n={2})'.format( biomarker, mean_changes_biomarker[0.25], num_subjects[0.25]) print log.RESULT, '{0} LMCI: {1}, (n={2})'.format( biomarker, mean_changes_biomarker[0.75], num_subjects[0.75]) print log.RESULT, '{0} AD: {1}, (n={2})'.format( biomarker, mean_changes_biomarker[1.0], num_subjects[1.0]) mean_changes_file = os.path.join(data_handler.get_eval_folder(), 'mean_changes.p') pickle.dump(mean_changes, open(mean_changes_file, 'wb'))
def main(): # Collect data for test data_handler = DataHandler.get_data_handler() biomarkers = DataHandler.get_all_biomarker_names() mean_changes = {} for biomarker in biomarkers: measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm12'], biomarkers=[biomarker], select_complete=True) mean_changes_biomarker = {0.0: 0.0, 0.25: 0.0, 0.75: 0.0, 1.0: 0.0} num_subjects = {0.0: 0, 0.25: 0, 0.75: 0, 1.0: 0} for rid in measurements: diagnosis = measurements[rid]['bl']['DX.scan'] value_bl = measurements[rid]['bl'][biomarker] value_y1 = measurements[rid]['m12'][biomarker] scantime_bl = measurements[rid]['bl']['scantime'] scantime_y1 = measurements[rid]['m12']['scantime'] change = (value_y1 - value_bl) / (scantime_y1 - scantime_bl) mean_changes_biomarker[diagnosis] += change num_subjects[diagnosis] += 1 mean_change_mci_ad = mean_changes_biomarker[0.25] + mean_changes_biomarker[0.75] + mean_changes_biomarker[1.0] num_subjects_mci_ad = num_subjects[0.25] + num_subjects[0.75] + num_subjects[1.0] for diagnosis in mean_changes_biomarker: mean_changes_biomarker[diagnosis] /= num_subjects[diagnosis] mean_changes_biomarker.update({0.66: mean_change_mci_ad / num_subjects_mci_ad}) mean_changes.update({biomarker: mean_changes_biomarker}) print log.RESULT, '{0} CN: {1}, (n={2})'.format(biomarker, mean_changes_biomarker[0.0], num_subjects[0.0]) print log.RESULT, '{0} EMCI: {1}, (n={2})'.format(biomarker, mean_changes_biomarker[0.25], num_subjects[0.25]) print log.RESULT, '{0} LMCI: {1}, (n={2})'.format(biomarker, mean_changes_biomarker[0.75], num_subjects[0.75]) print log.RESULT, '{0} AD: {1}, (n={2})'.format(biomarker, mean_changes_biomarker[1.0], num_subjects[1.0]) mean_changes_file = os.path.join(data_handler.get_eval_folder(), 'mean_changes.p') pickle.dump(mean_changes, open(mean_changes_file, 'wb'))
def read_measurements_from_cvs(filename): """ Created a dict from the sample measurements file. For compatibility with the library, the dict has to have the { <rid> : { <viscode> : { DX.scan : <diagnosis> } { scantime : <days after bl> } { <biomarker1> : <volume> } ... } { <viscode> : ... }} :param filename: filename of the *.csv file :rtype: dict :return: the generated dict with the measurements """ scantime_dict = {'bl': 0, 'm12': 365, 'm24': 730, 'm36': 1095} biomarkers = set() measurements = {0: {}} with open(filename) as csvfile: reader = csv.DictReader(csvfile) visits = reader.fieldnames[1:] for visit in visits: measurements[0].update({ visit: { 'scantime': scantime_dict[visit], 'DX.scan': 'UNKNOWN' } }) for row in reader: biomarker = row['Biomarker Name'] if biomarker in DataHandler.get_all_biomarker_names(): for visit in visits: try: measurements[0][visit].update( {biomarker: float(row[visit])}) biomarkers.add(biomarker) except ValueError: pass return measurements, list(biomarkers)