def main(args=None):
    config = xe.get_config(args.config)
    session = xe.get_xnat_session(config)
    if args.update:
        # Update the cache of XNAT Experiment XML files
        xe.extract_experiment_xml(config, session, args.experimentsdir,
                                  args.num_extract)

    # extract info from the experiment XML files
    experiment = xe.get_experiments_dir_info(args.experimentsdir)
    experiment_df = xe.experiments_to_dataframe(experiment)
    reading = xe.get_experiments_dir_reading_info(args.experimentsdir)
    reading_df = xe.reading_to_dataframe(reading)
    experiment_reading = inner_join_dataframes(experiment_df, reading_df)

    # exclude phantoms, but include the traveling human phantoms
    site_id_pattern = '[A-EX]-[0-9]{5}-[MFT]-[0-9]'
    df = experiment_reading[experiment_reading.site_id.str.contains(
        site_id_pattern)]

    result = None
    if args.report_type == 'no_findings_date':
        # Findings are listed without a findings date
        result = findings_date_empty(df)
        if args.set_findings_date:
            # Update the findings date to equal the date to dvd
            update_findings_date(args.config, result)

    elif args.report_type == 'no_findings':
        # Findings is empty but a date is listed
        result = findings_empty(df)

    elif args.report_type == 'no_findings_or_date':
        # Both the findings and findings date are empty
        result = findings_and_date_empty(df)
        if args.reset_datetodvd:
            record = result[result.experiment_id == experiment]
            project = record.project.values[0]
            subject = record.subject_id.values[0]
            experiment = args.reset_datetodvd
            set_experiment_attrs(args.config, project, subject, experiment,
                                 'datetodvd', 'none')

    elif args.report_type == 'correct_dvd_date':
        dates_df = pd.read_csv(args.file_to_reset_datetodvd)
        result = pd.DataFrame(index=['Subject'],
                              columns=[
                                  'project', 'subject_id', 'experiment_id',
                                  'site_experiment_id', 'datetodvd',
                                  'findingsdate'
                              ])
        result = result.fillna(0)
        for subject in df['subject_id'].tolist():
            if subject in dates_df['mri_xnat_sid'].tolist():
                if args.verbose:
                    print "Checking for {}".format(subject)
                eids = dates_df[dates_df['mri_xnat_sid'] ==
                                subject]['mri_xnat_eids'].tolist()
                date = dates_df[dates_df['mri_xnat_sid'] ==
                                subject]['mri_datetodvd'].tolist()
                if eids != []:
                    if len(eids[0]) == 13:
                        experiment = eids[0]
                        record = df[df.experiment_id == experiment]
                        record_date = record['datetodvd'].tolist()
                        if date != [] and record_date != []:
                            if record_date[0] != date[0] or type(
                                    record_date[0]) != str():
                                project = record.project.values[0]
                                subject = record.subject_id.values[0]
                                experiment = record.experiment_id.values[0]
                                set_experiment_attrs(args.config, project,
                                                     subject, experiment,
                                                     'datetodvd', date[0])
                    elif len(eids[0]) == 27 or eids == None:
                        experiment = eids[0].split(" ")
                        for e in experiment:
                            record_date = record['datetodvd'].tolist()
                            record = df[df.experiment_id == e]
                            if date != [] and record_date != []:
                                if record_date[0] != date[0] or type(
                                        record_date[0]) == str():
                                    project = record.project.values[0]
                                    subject = record.subject_id.values[0]
                                    set_experiment_attrs(
                                        args.config, project, subject, e,
                                        'datetodvd', date[0])

    elif args.report_type == 'no_findings_before_date':
        # Findings and Findings Date is empty before a given date
        if not args.before_date:
            raise (Exception(
                "Please set --before-date YYYY-MM-DD when running the no_findings_before_date report."
            ))
        has_dvd_before_date = check_dvdtodate_before_date(
            df, before_date=args.before_date)
        result = findings_and_date_empty(has_dvd_before_date)
        result.to_csv(args.outfile, index=False)
    else:
        raise (
            NotImplementedError("The report you entered is not in the list."))

    result.to_csv(args.outfile,
                  columns=[
                      'project', 'subject_id', 'experiment_id',
                      'site_experiment_id', 'datetodvd', 'findingsdate'
                  ],
                  index=False)
    if verbose:
        pd.set_option('display.max_rows', len(result))
        print("Total records found: {}".format(len(result)))
        print(result[['experiment_id', 'site_experiment_id']])
        pd.reset_option('display.max_rows')
        print("Finished!")
Example #2
0
def main(args=None):
    # TODO: Handle when T1 and T2 are in separate session (i.e., rescan)
    if args.update:
        config = xe.get_config(args.config)
        session = xe.get_xnat_session(config)
        xe.extract_experiment_xml(config, session, args.experimentsdir,
                                  args.num_extract)

    # extract info from the experiment XML files
    experiment = xe.get_experiments_dir_info(args.experimentsdir)
    # Scan specific information
    scan = xe.get_experiments_dir_scan_info(args.experimentsdir)
    # Session info
    reading = xe.get_experiments_dir_reading_info(args.experimentsdir)
    df = xe.merge_experiments_scans_reading(experiment, scan, reading)

    # exclude phantoms, including the traveling human phantoms
    site_id_pattern = '[A-E]-[0-9]{5}-[MF]-[0-9]'
    df = df[df.site_id.str.contains(site_id_pattern)]

    # exclude subjects not part of study
    df = df[df['subject_id'] != 'NCANDA_S00127']

    if args.unknown:
        print "Sessions that have not yet been quality controlled"
        scanCheckList = pd.DataFrame()
        required_scans = [
            'ncanda-mprage-v1', 'ncanda-t1spgr-v1', 'ncanda-t2fse-v1',
            'ncanda-dti6b500pepolar-v1', 'ncanda-dti30b400-v1',
            'ncanda-dti60b1000-v1', 'ncanda-grefieldmap-v1', 'ncanda-rsfmri-v1'
        ]

        for eid in df.experiment_id.drop_duplicates():
            eid_df = df[df.experiment_id == eid]
            eid_df = eid_df[~pd.isnull(eid_df['quality'])]
            if not len(eid_df[eid_df['quality'] != 'unknown']):
                print eid
            else:
                unknownScanDF = eid_df[eid_df['quality'] == 'unknown']
                mandatoryCheck = unknownScanDF[unknownScanDF['scan_type'].isin(
                    required_scans)]
                if len(mandatoryCheck):
                    scanCheckList = scanCheckList.append(mandatoryCheck)

        print " "
        print "Mandatory scans that have not yet been quality controlled"
        pd.set_option('display.max_rows', len(scanCheckList))
        print scanCheckList['scan_type']

        sys.exit()

    if args.ignore_window or args.session_notes or args.scan_notes:
        if args.usable:
            df = df[df['quality'] == 'usable']

        columns = [
            'site_id', 'subject_id', 'experiment_id', 'experiment_date',
            'excludefromanalysis'
        ]
        if args.ignore_window or args.scan_notes:
            columns = columns + ['scan_id', 'scan_type', 'quality']
            if args.scan_notes:
                columns = columns + ['scan_note']

        if args.session_notes:
            columns = columns + ['note']

        result = df[columns]

        # print result
    else:
        df.loc[:, 'experiment_date'] = df.experiment_date.astype('datetime64')
        result = pd.DataFrame()
        for subject_id in df.subject_id.drop_duplicates():
            subject_df = df[df.subject_id == subject_id]

            # find the earliest exam date for each given subject
            grouping = subject_df.groupby('subject_id')
            baseline_date = grouping['experiment_date'].nsmallest(1)
            baseline_df = subject_df[subject_df.experiment_date ==
                                     baseline_date[0]]

            # Find window for follow-up
            day_min = pd.datetools.Day(n=args.min)
            day_max = pd.datetools.Day(n=args.max)
            followup_min = baseline_df.experiment_date + day_min
            followup_max = baseline_df.experiment_date + day_max

            df_min = subject_df.experiment_date > followup_min[0]
            df_max = subject_df.experiment_date < followup_max[0]
            followup_df = subject_df[df_min & df_max]

            # Included followup sessions slightly outside window
            included = ['NCANDA_E02615', 'NCANDA_E02860']
            included_df = subject_df[subject_df.experiment_id.isin(included)]
            if included_df.shape[0]:
                followup_df = included_df

            # Create report for baseline visit
            if args.baseline:
                followup_df = baseline_df

            # filter for specific scan types

            scan_type_pairs = get_scan_type_pairs(args.modality)
            scan1 = scan_type_pairs.get('scan1')
            scan2 = scan_type_pairs.get('scan2')
            scan1_df = followup_df[followup_df.scan_type.isin(scan1)]
            scan2_df = followup_df[followup_df.scan_type.isin(scan2)]

            # Filter quality column
            if args.usable:
                scan1_selected = scan1_df[scan1_df.quality == 'usable']
                scan2_selected = scan2_df[scan2_df.quality == 'usable']
            else:
                scan1_selected = scan1_df
                scan2_selected = scan2_df

            # report columns
            columns = [
                'site_id', 'subject_id', 'experiment_id', 'experiment_date',
                'excludefromanalysis', 'note', 'scan_type', 'quality',
                'scan_note'
            ]
            scan1_recs = scan1_selected.loc[:, columns].to_records(index=False)
            scan2_recs = scan2_selected.loc[:, columns].to_records(index=False)

            scan1_report = pd.DataFrame(scan1_recs,
                                        index=scan1_selected.experiment_id)
            scan2_report = pd.DataFrame(scan2_recs,
                                        index=scan2_selected.experiment_id)

            scan1_scan2_report = scan1_report.join(
                scan2_report[['scan_type', 'quality', 'scan_note']],
                lsuffix='_scan1',
                rsuffix='_scan2',
                how='inner')
            if scan1_scan2_report.shape[0]:
                result = result.append(scan1_scan2_report)
    #
    # Write out results
    #

    # Remove any duplicate rows due to extra usable scan types (i.e., fieldmaps)
    result = result.drop_duplicates()
    result.to_csv(args.outfile, index=False)
def main(args=None):
    # TODO: Handle when T1 and T2 are in separate session (i.e., rescan)
    if args.update:
        config = xe.get_config(args.config)
        session = xe.get_xnat_session(config)
        xe.extract_experiment_xml(config, session,
                                  args.experimentsdir, args.num_extract)

    # extract info from the experiment XML files
    experiment = xe.get_experiments_dir_info(args.experimentsdir)
    # Scan specific information 
    scan = xe.get_experiments_dir_scan_info(args.experimentsdir)
    # Session info 
    reading = xe.get_experiments_dir_reading_info(args.experimentsdir)
    df = xe.merge_experiments_scans_reading(experiment, scan, reading)

    # exclude phantoms, including the traveling human phantoms
    site_id_pattern = '[A-E]-[0-9]{5}-[MF]-[0-9]'
    df = df[df.site_id.str.contains(site_id_pattern)]

    # convert to date type
    df.loc[:, 'experiment_date'] = df.experiment_date.astype('datetime64')

    result = pd.DataFrame()
    for subject_id in df.subject_id.drop_duplicates():
        subject_df = df[df.subject_id == subject_id]

        # find the earliest exam date for each given subject
        grouping = subject_df.groupby('subject_id')
        baseline_date = grouping['experiment_date'].nsmallest(1)
        baseline_df = subject_df[subject_df.experiment_date == baseline_date[0]]

        # Find window for follow-up
        day_min = pd.datetools.Day(n=args.min)
        day_max = pd.datetools.Day(n=args.max)
        followup_min = baseline_df.experiment_date + day_min
        followup_max = baseline_df.experiment_date + day_max

        df_min = subject_df.experiment_date > followup_min[0]
        df_max = subject_df.experiment_date < followup_max[0]
        followup_df = subject_df[df_min & df_max]

        # Included followup sessions slightly outside window
        included = ['NCANDA_E02615', 'NCANDA_E02860']
        included_df = subject_df[subject_df.experiment_id.isin(included)]
        if included_df.shape[0]:
            followup_df = included_df

        # Create report for baseline visit
        if args.baseline:
            followup_df = baseline_df

        # filter for specific scan types
       
        scan_type_pairs = get_scan_type_pairs(args.modality)
        scan1 = scan_type_pairs.get('scan1')
        scan2 = scan_type_pairs.get('scan2')
        scan1_df = followup_df[followup_df.scan_type.isin(scan1)]
        scan2_df = followup_df[followup_df.scan_type.isin(scan2)]

        # Filter quality column
        if args.usable : 
            scan1_selected = scan1_df[scan1_df.quality == 'usable']
            scan2_selected = scan2_df[scan2_df.quality == 'usable']
        else : 
            scan1_selected = scan1_df
            scan2_selected = scan2_df

        # report columns
        columns = ['site_id', 'subject_id', 'experiment_id', 'experiment_date',
                   'excludefromanalysis', 'note', 'scan_type', 'quality',
                   'scan_note']
        scan1_recs = scan1_selected.loc[:, columns].to_records(index=False)
        scan2_recs = scan2_selected.loc[:, columns].to_records(index=False)

        scan1_report = pd.DataFrame(scan1_recs,
                                    index=scan1_selected.experiment_id)
        scan2_report = pd.DataFrame(scan2_recs,
                                    index=scan2_selected.experiment_id)

        scan1_scan2_report = scan1_report.join(scan2_report[['scan_type',
                                                             'quality',
                                                             'scan_note']],
                                               lsuffix='_scan1',
                                               rsuffix='_scan2',
                                               how='inner')
        if scan1_scan2_report.shape[0]:
            result = result.append(scan1_scan2_report)
    # Remove any duplicate rows due to extra usable scan types (i.e., fieldmaps)
    result = result.drop_duplicates()
    result.to_csv(args.outfile, index=False)
def main(args=None):
    config = xe.get_config(args.config)
    session = xe.get_xnat_session(config)
    if args.update:
        # Update the cache of XNAT Experiment XML files
        xe.extract_experiment_xml(config, session,
                                  args.experimentsdir, args.num_extract)

    # extract info from the experiment XML files
    experiment = xe.get_experiments_dir_info(args.experimentsdir)
    experiment_df = xe.experiments_to_dataframe(experiment)
    reading = xe.get_experiments_dir_reading_info(args.experimentsdir)
    reading_df = xe.reading_to_dataframe(reading)
    experiment_reading = inner_join_dataframes(experiment_df, reading_df)

    # exclude phantoms, but include the traveling human phantoms
    site_id_pattern = '[A-EX]-[0-9]{5}-[MFT]-[0-9]'
    df = experiment_reading[experiment_reading.site_id.str.contains(site_id_pattern)]

    result = None
    if args.report_type == 'no_findings_date':
        # Findings are listed without a findings date
        result = findings_date_empty(df)
        if args.set_findings_date:
            # Update the findings date to equal the date to dvd
            update_findings_date(args.config, result)

    elif args.report_type == 'no_findings':
        # Findings is empty but a date is listed
        result = findings_empty(df)

    elif args.report_type == 'no_findings_or_date':
        # Both the findings and findings date are empty
        result = findings_and_date_empty(df)
        if args.reset_datetodvd:
            record = result[result.experiment_id == experiment]
            project = record.project.values[0]
            subject = record.subject_id.values[0]
            experiment = args.reset_datetodvd
            set_experiment_attrs(args.config, project, subject, experiment, 'datetodvd', 'none')

    elif args.report_type == 'correct_dvd_date':
        dates_df = pd.read_csv(args.file_to_reset_datetodvd)
        result = pd.DataFrame(index=['Subject'], columns=['project', 'subject_id', 'experiment_id',
                 'site_experiment_id', 'datetodvd', 'findingsdate'])
        result = result.fillna(0)
        for subject in df['subject_id'].tolist():
            if subject in dates_df['mri_xnat_sid'].tolist():
                if args.verbose:
                    print "Checking for {}".format(subject)
                eids = dates_df[dates_df['mri_xnat_sid'] == subject]['mri_xnat_eids'].tolist()
                date = dates_df[dates_df['mri_xnat_sid'] == subject]['mri_datetodvd'].tolist()
                if eids != []:
                    if len(eids[0]) == 13:
                        experiment = eids[0]
                        record = df[df.experiment_id == experiment]
                        record_date = record['datetodvd'].tolist()
                        if date != [] and record_date != []:
                            if record_date[0] != date[0] or type(record_date[0]) != str() :
                                project = record.project.values[0]
                                subject = record.subject_id.values[0]
                                experiment = record.experiment_id.values[0]
                                set_experiment_attrs(args.config, project, subject, experiment, 'datetodvd', date[0])
                    elif len(eids[0]) == 27 or eids == None:
                        experiment = eids[0].split(" ")
                        for e in experiment:
                            record_date = record['datetodvd'].tolist()
                            record = df[df.experiment_id == e]
                            if date != [] and record_date != []:
                                if record_date[0] != date[0] or type(record_date[0]) == str():
                                    project = record.project.values[0]
                                    subject = record.subject_id.values[0]
                                    set_experiment_attrs(args.config, project, subject, e, 'datetodvd', date[0])

    elif args.report_type == 'no_findings_before_date':
        # Findings and Findings Date is empty before a given date
        if not args.before_date:
            raise(Exception("Please set --before-date YYYY-MM-DD when running the no_findings_before_date report."))
        has_dvd_before_date = check_dvdtodate_before_date(df, before_date=args.before_date)
        result = findings_and_date_empty(has_dvd_before_date)
        result.to_csv(args.outfile, index=False)
    else:
        raise(NotImplementedError("The report you entered is not in the list."))

    result.to_csv(args.outfile,
                  columns=['project', 'subject_id', 'experiment_id',
                           'site_experiment_id', 'datetodvd', 'findingsdate'],
                  index=False)
    if verbose:
        pd.set_option('display.max_rows', len(result))
        print("Total records found: {}".format(len(result)))
        print(result[['experiment_id', 'site_experiment_id']])
        pd.reset_option('display.max_rows')
        print("Finished!")
Example #5
0
def main(args=None):
    # TODO: Handle when T1 and T2 are in separate session (i.e., rescan)
    if args.update:
        config = xe.get_config(args.config)
        session = xe.get_xnat_session(config)
        xe.extract_experiment_xml(config, session, args.experimentsdir,
                                  args.num_extract)

    # extract info from the experiment XML files
    experiment = xe.get_experiments_dir_info(args.experimentsdir)
    scan = xe.get_experiments_dir_scan_info(args.experimentsdir)
    reading = xe.get_experiments_dir_reading_info(args.experimentsdir)
    df = xe.merge_experiments_scans_reading(experiment, scan, reading)

    # exclude phantoms, including the traveling human phantoms
    site_id_pattern = '[A-E]-[0-9]{5}-[MF]-[0-9]'
    df = df[df.site_id.str.contains(site_id_pattern)]

    # convert to date type
    df.loc[:, 'experiment_date'] = df.experiment_date.astype('datetime64')

    result = pd.DataFrame()
    for subject_id in df.subject_id.drop_duplicates():
        subject_df = df[df.subject_id == subject_id]

        # find the earliest exam date for each given subject
        baseline_date = subject_df.groupby(
            'subject_id')['experiment_date'].nsmallest(1)
        baseline_df = subject_df[subject_df.experiment_date ==
                                 baseline_date[0]]

        # Find window for follow-up
        followup_min = baseline_df.experiment_date + pd.datetools.Day(
            n=args.min)
        followup_max = baseline_df.experiment_date + pd.datetools.Day(
            n=args.max)

        followup_df = subject_df[
            (subject_df.experiment_date > followup_min[0])
            & (subject_df.experiment_date < followup_max[0])]

        # Included followup sessions slightly outside window
        included = ['NCANDA_E02615', 'NCANDA_E02860']
        included_df = subject_df[subject_df.experiment_id.isin(included)]
        if included_df.shape[0]:
            followup_df = included_df

        # Create report for baseline visit
        if args.baseline:
            followup_df = baseline_df

        # filter for specific scan types
        scan_type_pairs = get_scan_type_pairs(args.modality)
        scan1 = scan_type_pairs.get('scan1')
        scan2 = scan_type_pairs.get('scan2')
        scan1_df = followup_df[followup_df.scan_type.isin(scan1)]
        scan2_df = followup_df[followup_df.scan_type.isin(scan2)]

        # Filter quality column
        scan1_usable = scan1_df[scan1_df.quality == 'usable']
        scan2_usable = scan2_df[scan2_df.quality == 'usable']

        # report columns
        columns = [
            'site_id', 'subject_id', 'experiment_id', 'scan_type',
            'experiment_date', 'quality', 'excludefromanalysis', 'note'
        ]
        scan1_recs = scan1_usable.loc[:, columns].to_records(index=False)
        scan2_recs = scan2_usable.loc[:, columns].to_records(index=False)

        scan1_report = pd.DataFrame(scan1_recs,
                                    index=scan1_usable.experiment_id)
        scan2_report = pd.DataFrame(scan2_recs,
                                    index=scan2_usable.experiment_id)

        scan1_scan2_report = scan1_report.join(
            scan2_report[['scan_type', 'quality']],
            lsuffix='_scan1',
            rsuffix='_scan2',
            how='inner')
        if scan1_scan2_report.shape[0]:
            result = result.append(scan1_scan2_report)
    # Remove any duplicate rows due to extra usable scan types (i.e., fieldmaps)
    result = result.drop_duplicates()
    result.to_csv(args.outfile, index=False)