def read_measurements_from_cvs(filename):
    """
    Created a dict from the sample measurements file. For compatibility with the library, the dict has to have the
    { <rid> : { <viscode> : { DX.scan : <diagnosis> }
                            { scantime : <days after bl> }
                            { <biomarker1> : <volume> }
                        ... }
              { <viscode> : ... }}

    :param filename: filename of the *.csv file
    :rtype: dict
    :return: the generated dict with the measurements
    """
    scantime_dict = {'bl': 0, 'm12': 365, 'm24': 730, 'm36': 1095}

    biomarkers = set()
    measurements = {0: {}}
    with open(filename) as csvfile:
        reader = csv.DictReader(csvfile)
        visits = reader.fieldnames[1:]
        for visit in visits:
            measurements[0].update({visit: {'scantime': scantime_dict[visit], 'DX.scan': 'UNKNOWN'}})

        for row in reader:
            biomarker = row['Biomarker Name']
            if biomarker in DataHandler.get_all_biomarker_names():
                for visit in visits:
                    try:
                        measurements[0][visit].update({biomarker: float(row[visit])})
                        biomarkers.add(biomarker)
                    except ValueError:
                        pass

    return measurements, list(biomarkers)
def main():
    # Collect data for test
    data_handler = DataHandler.get_data_handler()
    biomarkers = DataHandler.get_all_biomarker_names()

    mean_changes = {}
    for biomarker in biomarkers:
        measurements = data_handler.get_measurements_as_dict(
            visits=['bl', 'm12'], biomarkers=[biomarker], select_complete=True)

        mean_changes_biomarker = {0.0: 0.0, 0.25: 0.0, 0.75: 0.0, 1.0: 0.0}
        num_subjects = {0.0: 0, 0.25: 0, 0.75: 0, 1.0: 0}
        for rid in measurements:
            diagnosis = measurements[rid]['bl']['DX.scan']
            value_bl = measurements[rid]['bl'][biomarker]
            value_y1 = measurements[rid]['m12'][biomarker]
            scantime_bl = measurements[rid]['bl']['scantime']
            scantime_y1 = measurements[rid]['m12']['scantime']

            change = (value_y1 - value_bl) / (scantime_y1 - scantime_bl)

            mean_changes_biomarker[diagnosis] += change
            num_subjects[diagnosis] += 1

        mean_change_mci_ad = mean_changes_biomarker[
            0.25] + mean_changes_biomarker[0.75] + mean_changes_biomarker[1.0]
        num_subjects_mci_ad = num_subjects[0.25] + num_subjects[
            0.75] + num_subjects[1.0]
        for diagnosis in mean_changes_biomarker:
            mean_changes_biomarker[diagnosis] /= num_subjects[diagnosis]
        mean_changes_biomarker.update(
            {0.66: mean_change_mci_ad / num_subjects_mci_ad})

        mean_changes.update({biomarker: mean_changes_biomarker})

        print log.RESULT, '{0} CN:   {1}, (n={2})'.format(
            biomarker, mean_changes_biomarker[0.0], num_subjects[0.0])
        print log.RESULT, '{0} EMCI: {1}, (n={2})'.format(
            biomarker, mean_changes_biomarker[0.25], num_subjects[0.25])
        print log.RESULT, '{0} LMCI: {1}, (n={2})'.format(
            biomarker, mean_changes_biomarker[0.75], num_subjects[0.75])
        print log.RESULT, '{0} AD:   {1}, (n={2})'.format(
            biomarker, mean_changes_biomarker[1.0], num_subjects[1.0])

    mean_changes_file = os.path.join(data_handler.get_eval_folder(),
                                     'mean_changes.p')
    pickle.dump(mean_changes, open(mean_changes_file, 'wb'))
def main():
    # Collect data for test
    data_handler = DataHandler.get_data_handler()
    biomarkers = DataHandler.get_all_biomarker_names()

    mean_changes = {}
    for biomarker in biomarkers:
        measurements = data_handler.get_measurements_as_dict(visits=['bl', 'm12'],
                                                             biomarkers=[biomarker],
                                                             select_complete=True)

        mean_changes_biomarker = {0.0: 0.0, 0.25: 0.0, 0.75: 0.0, 1.0: 0.0}
        num_subjects = {0.0: 0, 0.25: 0, 0.75: 0, 1.0: 0}
        for rid in measurements:
            diagnosis = measurements[rid]['bl']['DX.scan']
            value_bl = measurements[rid]['bl'][biomarker]
            value_y1 = measurements[rid]['m12'][biomarker]
            scantime_bl = measurements[rid]['bl']['scantime']
            scantime_y1 = measurements[rid]['m12']['scantime']

            change = (value_y1 - value_bl) / (scantime_y1 - scantime_bl)

            mean_changes_biomarker[diagnosis] += change
            num_subjects[diagnosis] += 1

        mean_change_mci_ad = mean_changes_biomarker[0.25] + mean_changes_biomarker[0.75] + mean_changes_biomarker[1.0]
        num_subjects_mci_ad = num_subjects[0.25] + num_subjects[0.75] + num_subjects[1.0]
        for diagnosis in mean_changes_biomarker:
            mean_changes_biomarker[diagnosis] /= num_subjects[diagnosis]
        mean_changes_biomarker.update({0.66: mean_change_mci_ad / num_subjects_mci_ad})

        mean_changes.update({biomarker: mean_changes_biomarker})

        print log.RESULT, '{0} CN:   {1}, (n={2})'.format(biomarker, mean_changes_biomarker[0.0], num_subjects[0.0])
        print log.RESULT, '{0} EMCI: {1}, (n={2})'.format(biomarker, mean_changes_biomarker[0.25], num_subjects[0.25])
        print log.RESULT, '{0} LMCI: {1}, (n={2})'.format(biomarker, mean_changes_biomarker[0.75], num_subjects[0.75])
        print log.RESULT, '{0} AD:   {1}, (n={2})'.format(biomarker, mean_changes_biomarker[1.0], num_subjects[1.0])

    mean_changes_file = os.path.join(data_handler.get_eval_folder(), 'mean_changes.p')
    pickle.dump(mean_changes, open(mean_changes_file, 'wb'))
def read_measurements_from_cvs(filename):
    """
    Created a dict from the sample measurements file. For compatibility with the library, the dict has to have the
    { <rid> : { <viscode> : { DX.scan : <diagnosis> }
                            { scantime : <days after bl> }
                            { <biomarker1> : <volume> }
                        ... }
              { <viscode> : ... }}

    :param filename: filename of the *.csv file
    :rtype: dict
    :return: the generated dict with the measurements
    """
    scantime_dict = {'bl': 0, 'm12': 365, 'm24': 730, 'm36': 1095}

    biomarkers = set()
    measurements = {0: {}}
    with open(filename) as csvfile:
        reader = csv.DictReader(csvfile)
        visits = reader.fieldnames[1:]
        for visit in visits:
            measurements[0].update({
                visit: {
                    'scantime': scantime_dict[visit],
                    'DX.scan': 'UNKNOWN'
                }
            })

        for row in reader:
            biomarker = row['Biomarker Name']
            if biomarker in DataHandler.get_all_biomarker_names():
                for visit in visits:
                    try:
                        measurements[0][visit].update(
                            {biomarker: float(row[visit])})
                        biomarkers.add(biomarker)
                    except ValueError:
                        pass

    return measurements, list(biomarkers)