Ejemplo n.º 1
0
def update_blacklist(entries, study=None, config=None, path=None):
    if not isinstance(entries, dict):
        raise MetadataException("Blacklist entries must be in dictionary "
                                "format with scan name as the key and reason "
                                "for blacklisting as the value")

    if dashboard.dash_found and not path:
        _update_scan_checklist(entries)
        return

    blacklist_path = locate_metadata("blacklist.csv",
                                     study=study,
                                     config=config,
                                     path=path)
    old_entries = read_blacklist(path=blacklist_path)

    for scan_name in entries:
        try:
            scanid.parse_filename(scan_name)
        except scanid.ParseException:
            raise MetadataException(
                f"Attempt to add invalid scan name {scan_name} to blacklist")
        if not entries[scan_name]:
            logger.error("Can't add blacklist entry with empty comment. "
                         f"Skipping {scan_name}")
            continue
        old_entries[scan_name] = entries[scan_name]

    lines = [f"{sub} {old_entries[sub]}\n" for sub in old_entries]
    new_list = ["series\treason\n"]
    new_list.extend(sorted(lines))
    write_metadata(new_list, blacklist_path)
Ejemplo n.º 2
0
def update_blacklist(entries, study=None, config=None, path=None):
    if not isinstance(entries, dict):
        raise MetadataException(
            "Blacklist entries must be in dictionary "
            "format with scan name as the key and reason for blacklisting "
            "as the value")

    if dashboard.dash_found and not path:
        _update_scan_checklist(entries)
        return

    blacklist_path = locate_metadata('blacklist.csv',
                                     study=study,
                                     config=config,
                                     path=path)
    old_entries = read_blacklist(path=blacklist_path)

    for scan_name in entries:
        try:
            datman.scanid.parse_filename(scan_name)
        except:
            raise MetadataException("Attempt to add invalid scan name {} "
                                    "to blacklist".format(scan_name))
        if not entries[scan_name]:
            logger.error("Can't add blacklist entry with empty comment. "
                         "Skipping {}".format(scan_name))
            continue
        old_entries[scan_name] = entries[scan_name]

    lines = ["{} {}\n".format(sub, old_entries[sub]) for sub in old_entries]
    new_list = ['series\treason\n']
    new_list.extend(sorted(lines))
    write_metadata(new_list, blacklist_path)
Ejemplo n.º 3
0
def _update_qc_reviewers(entries):
    """
    Support function for update_checklist(). Updates QC info on the dashboard.
    """
    try:
        user = dashboard.get_default_user()
    except:
        raise MetadataException(
            "Can't update dashboard QC information without "
            "a default dashboard user defined. Please add "
            "'DEFAULT_DASH_USER' to your config file.")

    for subject in entries:
        timepoint = dashboard.get_subject(subject)
        if not timepoint or not timepoint.sessions:
            raise MetadataException("{} not found in the in the dashboard "
                                    "database.".format(subject))

        comment = entries[subject]
        if not comment:
            # User was just registering a new QC entry. As long as the
            # session exists in the database there is no work to do.
            continue

        for num in timepoint.sessions:
            session = timepoint.sessions[num]
            if session.is_qcd():
                # Dont risk writing over QC-ers from the dashboard.
                continue
            session.sign_off(user.id)
Ejemplo n.º 4
0
def get_subject_metadata(config=None, study=None, allow_partial=False):
    """Returns all QC'd session IDs mapped to any blacklisted scans they have

    This will collect and organize all checklist and blacklist data for a
    study. Sessions that do not have a completed checklist entry will have
    their blacklist entries ommitted from the output unless the 'allow_partial'
    flag is used. This is done so that partially QC'd subjects do not
    accidentally get processed by downstream pipelines.

    Either a study name or a datman config object must be supplied to
    find the checklist and blacklist contents.

    Args:
        config (:obj:`datman.config.config`, optional): A datman config object
            with the study set to the study of interest.
        study (:obj:`str`, optional): A datman study name
        allow_partial (bool, optional): Whether to include blacklist entries
            if the subject has not been fully QC'd (i.e. if they dont have
            a completed checklist entry yet). Defaults to False.

    Returns:
        dict: A dictionary with any QC'd subject ID mapped to a list of
        blacklisted scan names that have been mangled to drop the series
        description and the file extension.
    """
    if not config:
        if not study:
            raise MetadataException(
                "A study name or config object must be "
                "given to locate study metadata."
            )
        config = datman.config.config(study=study)

    checklist = read_checklist(config=config)
    blacklist = read_blacklist(config=config)

    all_qc = {subid: [] for subid in checklist if checklist[subid]}
    for bl_entry in blacklist:
        try:
            ident, _, _, _ = scanid.parse_filename(bl_entry)
        except scanid.ParseException:
            logger.error(
                f"Malformed scan name {bl_entry} found in blacklist. Ignoring."
            )
            continue

        subid = ident.get_full_subjectid_with_timepoint()
        try:
            all_qc[subid].append(bl_entry)
        except KeyError:
            if allow_partial:
                all_qc.setdefault(subid, []).append(bl_entry)
            else:
                logger.error(
                    f"{subid} has blacklisted series {bl_entry} but does not "
                    "appear in QC checklist. Ignoring blacklist entry"
                )
                continue

    return all_qc
Ejemplo n.º 5
0
def get_subject_metadata(config=None, study=None):
    if not config:
        if not study:
            raise MetadataException("A study name or config object must be "
                                    "given to locate study metadata.")
        config = datman.config.config(study=study)

    checklist = read_checklist(config=config)
    blacklist = read_blacklist(config=config)

    all_qc = {subid: [] for subid in checklist if checklist[subid]}
    for bl_entry in blacklist:
        try:
            ident, _, _, _ = datman.scanid.parse_filename(bl_entry)
        except:
            logger.error("Malformed scan name {} found in blacklist. "
                         "Ignoring.".format(bl_entry))
            continue

        subid = ident.get_full_subjectid_with_timepoint()
        try:
            all_qc[subid]
        except KeyError:
            logger.error(
                "{} has blacklisted series {} but does not "
                "appear in QC checklist. Ignoring blacklist entry".format(
                    subid, bl_entry))
            continue

        all_qc[subid].append(bl_entry)

    return all_qc
Ejemplo n.º 6
0
def read_blacklist(study=None,
                   scan=None,
                   subject=None,
                   config=None,
                   path=None):
    """
    This function is used to look up blacklisted scans. If the dashboard is
    found it ONLY checks the dashboard database. Otherwise it expects a datman
    style 'blacklist' file on the filesystem.

    This function can accept:
        - A study name (nickname, not study tag)
        - A scan name (may include the full path and extension)
        - A subject ID
        - A datman config object, initialized to the study being worked with
        - A full path directly to a blacklist file. If given, this will
           circumvent any dashboard database checks and ignore any datman
           config files.

    Returns:
        - A dictionary of scan names mapped to the comment provided when they
          were blacklisted (Note: If reading from the filesystem, commas
          contained in comments will be removed)
        - OR a dictionary of the same format containing only entries
          for a single subject if a specific subject ID was given
        - OR the comment for a specific scan if a scan is given
        - OR 'None' if a scan is given but not found in the blacklist
    """
    if dashboard.dash_found and not path:
        return _fetch_blacklist(scan=scan,
                                subject=subject,
                                study=study,
                                config=config)

    if scan:
        try:
            ident, tag, series, descr = scanid.parse_filename(scan)
        except:
            logger.error("Invalid scan name: {}".format(scan))
            return
        tmp_sub = ident.get_full_subjectid_with_timepoint_session()
        # Need to drop the path and extension if in the original 'scan'
        scan = "_".join([str(ident), tag, series, descr])
    else:
        tmp_sub = subject

    blacklist_path = locate_metadata("blacklist.csv",
                                     study=study,
                                     subject=tmp_sub,
                                     config=config,
                                     path=path)
    try:
        with open(blacklist_path, 'r') as blacklist:
            entries = _parse_blacklist(blacklist, scan=scan, subject=subject)
    except Exception as e:
        raise MetadataException("Failed to read checklist file {}. Reason - "
                                "{}".format(blacklist_path, str(e)))

    return entries
Ejemplo n.º 7
0
def _fetch_blacklist(
    scan=None,
    subject=None,
    bids_ses=None,
    study=None,
    config=None,
    use_bids=False,
):
    """
    Helper function for 'read_blacklist()'. Gets the blacklist contents from
    the dashboard's database
    """
    if not (scan or subject or study or config):
        raise MetadataException(
            "Can't retrieve dashboard blacklist info "
            "without either 1) a scan name 2) a subject "
            "ID 3) a study ID or 4) a datman config "
            "object"
        )

    if scan:
        if use_bids:
            db_scan = dashboard.get_bids_scan(scan)
        else:
            db_scan = dashboard.get_scan(scan)
        if db_scan and db_scan.blacklisted():
            try:
                return db_scan.get_comment()
            except Exception:
                return db_scan.get_comment()
        return

    if subject:
        if use_bids or bids_ses:
            if not bids_ses:
                bids_ses = "01"
            db_subject = dashboard.get_bids_subject(subject, bids_ses, study)
        else:
            db_subject = dashboard.get_subject(subject)
        blacklist = db_subject.get_blacklist_entries()
    else:
        if config:
            study = config.study_name
        db_study = dashboard.get_project(study)
        blacklist = db_study.get_blacklisted_scans()

    entries = {}
    for entry in blacklist:
        if use_bids:
            if not entry.scan.bids_name:
                # Ignore scans without a bids name if bids was requested
                continue
            scan_name = entry.scan.bids_name
        else:
            scan_name = str(entry.scan) + "_" + entry.scan.description

        entries[scan_name] = entry.comment

    return entries
Ejemplo n.º 8
0
def update_checklist(entries, study=None, config=None, path=None):
    """
    Handles QC checklist updates. Will preferentially update the dashboard
    (ignoring any 'checklist.csv' files) unless the dashboard is not installed
    or a specific path is given to a file.

    <entries> should be a dictionary with subject IDs (minus session/repeat) as
    the keys and qc entries as the value (with an empty string for new/blank
    QC entries)

    This will raise a MetadataException if any part of the update fails for
    any entry.
    """
    if not isinstance(entries, dict):
        raise MetadataException(
            "Checklist entries must be in dictionary "
            "format with subject ID as the key and comment as the value "
            "(empty string for new, unreviewed subjects)")

    if dashboard.dash_found and not path:
        _update_qc_reviewers(entries)
        return

    # No dashboard, or path was given, so update file system.
    checklist_path = locate_metadata('checklist.csv',
                                     study=study,
                                     config=config,
                                     path=path)
    old_entries = read_checklist(path=checklist_path)

    # Merge with existing list
    for subject in entries:
        try:
            ident = datman.scanid.parse(subject)
        except:
            raise MetadataException("Attempt to add invalid subject ID {} to "
                                    "QC checklist".format(subject))
        subject = ident.get_full_subjectid_with_timepoint()
        old_entries[subject] = entries[subject]

    # Reformat to expected checklist line format
    lines = [
        "qc_{}.html {}\n".format(sub, old_entries[sub]) for sub in old_entries
    ]

    write_metadata(sorted(lines), checklist_path)
Ejemplo n.º 9
0
def _update_scan_checklist(entries):
    """
    Helper function for 'update_blacklist()'. Updates the dashboard's database.
    """
    try:
        user = dashboard.get_default_user()
    except:
        raise MetadataException(
            "Can't update dashboard QC information without "
            "a default dashboard user defined. Please add "
            "'DEFAULT_DASH_USER' to your config file.")

    for scan_name in entries:
        scan = dashboard.get_scan(scan_name)
        if not scan:
            raise MetadataException("{} does not exist in the dashboard "
                                    "database".format(scan_name))
        scan.add_checklist_entry(user.id,
                                 comment=entries[scan_name],
                                 sign_off=False)
Ejemplo n.º 10
0
def _parse_checklist(checklist, subject=None):
    """
    Support function for read_checklist(). Gets a list of existing / signed off
    sessions from a checklist.csv file.

    The 'checklist' argument is expected to be a handler for an already opened
    file.

    Returns: A dictionary of subject IDs (minus session/repeat num) mapped to
    their QC comments (or an empty string if it's a new entry). Or a single
    comment string if the 'subject' option was used
    """
    if subject:
        entries = None
    else:
        entries = {}

    for line in checklist.readlines():
        fields = line.split()
        if not fields:
            # Ignore blank lines
            continue
        try:
            subid = os.path.splitext(fields[0].replace("qc_", ""))[0]
        except (IndexError, TypeError):
            raise MetadataException(f"Found malformed checklist entry: {line}")
        try:
            scanid.parse(subid)
        except scanid.ParseException:
            logger.error(
                f"Found malformed subject ID {subid} in checklist. Ignoring."
            )
            continue

        if entries and subid in entries:
            logger.info(
                f"Found duplicate checklist entries for {subid}. Ignoring "
                "all except the first entry found."
            )
            continue

        comment = " ".join(fields[1:]).strip()
        if subject:
            if subid != subject:
                continue
            return comment
        else:
            entries[subid] = comment

    return entries
Ejemplo n.º 11
0
def write_metadata(lines, path, retry=3):
    """
    Repeatedly attempts to write lines to <path>. The destination file
    will be overwritten with <lines> so any contents you wish to preserve
    should be contained within the list.
    """
    if not retry:
        raise MetadataException("Failed to update {}".format(path))

    try:
        with open(path, "w") as meta_file:
            meta_file.writelines(lines)
    except:
        logger.error("Failed to write metadata file {}. Tries "
                     "remaining - {}".format(path, retry))
        wait_time = random.uniform(0, 10)
        time.sleep(wait_time)
        write_metadata(lines, path, retry=retry - 1)
Ejemplo n.º 12
0
def locate_metadata(filename, study=None, subject=None, config=None, path=None):
    if not (path or study or config or subject):
        raise MetadataException(
            f"Can't locate metadata file {filename} without either "
            "1) a full path to the file 2) a study or "
            "subject ID or 3) a datman.config "
            "object"
        )

    if path:
        file_path = path
    else:
        if not config:
            given_study = subject or study
            config = datman.config.config(study=given_study)
        file_path = os.path.join(config.get_path("meta"), filename)

    return file_path
Ejemplo n.º 13
0
def _fetch_checklist(subject=None, study=None, config=None):
    """
    Support function for read_checklist(). Gets a list of existing / signed off
    sessions from the dashboard.

    The checklist.csv file dropped the session number, so only information on
    the first session is reported to maintain consistency. :(

    Returns a dictionary formatted like that of '_parse_checklist' or a string
    comment if the 'subject' argument was given
    """
    if not (subject or study or config):
        raise MetadataException(
            "Can't retrieve dashboard checklist "
            "contents without either 1) a subject or study ID 2) a "
            "datman.config object")

    if subject:
        session = dashboard.get_session(subject)
        if not session:
            return
        if session.signed_off:
            return str(session.reviewer)
        return ''

    if config and not study:
        study = config.study_name

    db_study = dashboard.get_project(study)
    entries = {}
    for timepoint in db_study.timepoints:
        if timepoint.is_phantom or not len(timepoint.sessions):
            continue
        session = timepoint.sessions.values()[0]
        if session.signed_off:
            comment = str(session.reviewer)
        else:
            comment = ''
        str_name = timepoint.name.encode('utf-8')
        entries[str_name] = comment

    return entries
Ejemplo n.º 14
0
def _fetch_blacklist(scan=None, subject=None, study=None, config=None):
    """
    Helper function for 'read_blacklist()'. Gets the blacklist contents from
    the dashboard's database
    """
    if not (scan or subject or study or config):
        raise MetadataException(
            "Can't retrieve dashboard blacklist info "
            "without either 1) a scan name 2) a subject ID 3) a study ID or "
            "4) a datman config object")

    if scan:
        db_scan = dashboard.get_scan(scan)
        if db_scan and db_scan.blacklisted():
            try:
                return db_scan.get_comment().encode('utf-8')
            except:
                return db_scan.get_comment()
        return

    if subject:
        db_subject = dashboard.get_subject(subject)
        blacklist = db_subject.get_blacklist_entries()
    else:
        if config:
            study = config.study_name
        db_study = dashboard.get_project(study)
        blacklist = db_study.get_blacklisted_scans()

    entries = {}
    for entry in blacklist:
        scan_name = str(entry.scan) + "_" + entry.scan.description
        try:
            scan_name = scan_name.encode('utf-8')
            comment = entry.comment.encode('utf-8')
        except:
            comment = entry.comment
        entries[scan_name] = comment

    return entries
Ejemplo n.º 15
0
def read_checklist(study=None, subject=None, config=None, path=None):
    """
    This function is used to look-up QC checklist entries. If the dashboard is
    found it will ONLY check the dashboard database, otherwise it expects a
    datman style 'checklist' file on the filesystem.

    This function can accept either:
        1) A study name (nickname, not the study tag) or subject ID (Including
           a session number)
        2) A datman config object, initialized to the study being worked with
        3) A full path directly to a checklist file (Will circumvent the
           dashboard database check and ignore any datman config files)

    Returns:
        - A dictionary of subject IDs mapped to their comment / name of the
          person who signed off on their data
        - OR the comment for a specific subject if a subject ID is given
        - OR 'None' if a specific subject ID is given and they're not found
          in the list
    """
    if not (study or subject or config or path):
        raise MetadataException(
            "Can't read dashboard checklist "
            "contents without either 1) a subject or study ID 2) a "
            "datman.config object or 3) a full path to the checklist")

    if subject:
        ident = datman.scanid.parse(subject)

    if dashboard.dash_found and not path:
        if subject:
            subject = ident.get_full_subjectid_with_timepoint_session()
        try:
            entries = _fetch_checklist(subject=subject,
                                       study=study,
                                       config=config)
        except Exception as e:
            raise MetadataException(
                "Can't retrieve checklist information "
                "from dashboard database. Reason - {}".format(str(e)))
        return entries

    logger.info("Dashboard not found, attempting to find a checklist "
                "metadata file instead.")
    checklist_path = locate_metadata('checklist.csv',
                                     path=path,
                                     subject=subject,
                                     study=study,
                                     config=config)

    if subject:
        subject = ident.get_full_subjectid_with_timepoint()

    try:
        with open(checklist_path, 'r') as checklist:
            entries = _parse_checklist(checklist, subject=subject)
    except Exception as e:
        raise MetadataException("Failed to read checklist file "
                                "{}. Reason - {}".format(
                                    checklist_path, str(e)))

    return entries
Ejemplo n.º 16
0
def _fetch_checklist(
    subject=None,
    study=None,
    config=None,
    bids_id=None,
    bids_ses=None,
    use_bids=None,
):
    """
    Support function for read_checklist(). Gets a list of existing / signed off
    sessions from the dashboard.

    The checklist.csv file dropped the session number, so only information on
    the first session is reported to maintain consistency. :(

    Returns a dictionary formatted like that of '_parse_checklist' or a string
    comment if the 'subject' argument was given.

    If 'use_bids' is specified, the checklist will be organized with BIDS IDs
    instead of datman IDs. Subjects missing a bids ID will be omitted.
    """
    if not (subject or study or config):
        raise MetadataException("Can't retrieve dashboard checklist "
                                "contents without either 1) a subject or "
                                "study ID 2) a datman.config object")

    if subject:
        session = dashboard.get_session(subject)

    if bids_id:
        if not (study and bids_ses):
            raise MetadataException(
                "Cant retrieve checklist entry for BIDS "
                f"ID {bids_id} without a study and BIDS session "
                "number")
        session = dashboard.get_bids_subject(bids_id, bids_ses, study=study)

    if subject or bids_id:
        if not session:
            return
        if session.is_qcd():
            return str(session.reviewer)
        return ""

    if config and not study:
        study = config.study_name

    db_study = dashboard.get_project(study)
    entries = {}
    for timepoint in db_study.timepoints:
        if timepoint.is_phantom or not len(timepoint.sessions):
            continue
        session = list(timepoint.sessions.values())[0]
        if session.signed_off:
            comment = str(session.reviewer)
        else:
            comment = ""
        if use_bids:
            if not timepoint.bids_name:
                # If bids is requested ignore subjects without a bids name
                continue
            str_name = timepoint.bids_name
        else:
            str_name = timepoint.name
        entries[str_name] = comment

    return entries
Ejemplo n.º 17
0
def read_checklist(
    study=None,
    subject=None,
    config=None,
    path=None,
    bids_id=None,
    bids_ses=None,
    use_bids=False,
):
    """
    This function is used to look-up QC checklist entries. If the dashboard is
    found it will ONLY check the dashboard database, otherwise it expects a
    datman style 'checklist' file on the filesystem.

    This function can accept either:
        1) A study name (nickname, not the study tag) or subject ID (Including
           a session number and may use BIDS ID instead of datman ID)
        2) A datman config object, initialized to the study being worked with
        3) A full path directly to a checklist file (Will circumvent the
           dashboard database check and ignore any datman config files)

    Set use_bids=True to return an entire study's checklist organized by BIDS
    name instead of datman name. This option only works with dashboard
    integration.

    Returns:
        - A dictionary of subject IDs mapped to their comment / name of the
          person who signed off on their data
        - OR the comment for a specific subject if a subject ID is given
        - OR 'None' if a specific subject ID is given and they're not found
          in the list
    """
    if not (study or subject or config or path or bids_id):
        raise MetadataException("Can't read dashboard checklist "
                                "contents without either 1) a subject or "
                                "study ID 2) a datman.config object or 3) a "
                                "full path to the checklist")

    if bids_id and not study:
        raise MetadataException("Must provide a study to search by BIDS ID")

    if subject:
        ident = scanid.parse(subject)

    if dashboard.dash_found and not path:
        if subject:
            subject = ident.get_full_subjectid_with_timepoint_session()
        if bids_id and not bids_ses:
            bids_ses = "01"
        if bids_id and not type(bids_ses) == str:
            bids_ses = f"{bids_ses:02d}"
        try:
            entries = _fetch_checklist(
                subject=subject,
                study=study,
                config=config,
                bids_id=bids_id,
                bids_ses=bids_ses,
                use_bids=use_bids,
            )
        except Exception as e:
            raise MetadataException("Can't retrieve checklist information "
                                    "from dashboard database. Reason - "
                                    f"{str(e)}")
        return entries

    logger.info("Dashboard not found, attempting to find a checklist "
                "metadata file instead.")
    if use_bids or bids_id:
        raise MetadataException(
            "BIDS IDs may only be used if querying the dashboard database.")

    checklist_path = locate_metadata("checklist.csv",
                                     path=path,
                                     subject=subject,
                                     study=study,
                                     config=config)

    if subject:
        subject = ident.get_full_subjectid_with_timepoint()

    try:
        with open(checklist_path, "r") as checklist:
            entries = _parse_checklist(checklist, subject=subject)
    except Exception as e:
        raise MetadataException(
            f"Failed to read checklist file {checklist_path}. Reason - {str(e)}"
        )

    return entries