def update_blacklist(entries, study=None, config=None, path=None): if not isinstance(entries, dict): raise MetadataException("Blacklist entries must be in dictionary " "format with scan name as the key and reason " "for blacklisting as the value") if dashboard.dash_found and not path: _update_scan_checklist(entries) return blacklist_path = locate_metadata("blacklist.csv", study=study, config=config, path=path) old_entries = read_blacklist(path=blacklist_path) for scan_name in entries: try: scanid.parse_filename(scan_name) except scanid.ParseException: raise MetadataException( f"Attempt to add invalid scan name {scan_name} to blacklist") if not entries[scan_name]: logger.error("Can't add blacklist entry with empty comment. " f"Skipping {scan_name}") continue old_entries[scan_name] = entries[scan_name] lines = [f"{sub} {old_entries[sub]}\n" for sub in old_entries] new_list = ["series\treason\n"] new_list.extend(sorted(lines)) write_metadata(new_list, blacklist_path)
def update_blacklist(entries, study=None, config=None, path=None): if not isinstance(entries, dict): raise MetadataException( "Blacklist entries must be in dictionary " "format with scan name as the key and reason for blacklisting " "as the value") if dashboard.dash_found and not path: _update_scan_checklist(entries) return blacklist_path = locate_metadata('blacklist.csv', study=study, config=config, path=path) old_entries = read_blacklist(path=blacklist_path) for scan_name in entries: try: datman.scanid.parse_filename(scan_name) except: raise MetadataException("Attempt to add invalid scan name {} " "to blacklist".format(scan_name)) if not entries[scan_name]: logger.error("Can't add blacklist entry with empty comment. " "Skipping {}".format(scan_name)) continue old_entries[scan_name] = entries[scan_name] lines = ["{} {}\n".format(sub, old_entries[sub]) for sub in old_entries] new_list = ['series\treason\n'] new_list.extend(sorted(lines)) write_metadata(new_list, blacklist_path)
def _update_qc_reviewers(entries): """ Support function for update_checklist(). Updates QC info on the dashboard. """ try: user = dashboard.get_default_user() except: raise MetadataException( "Can't update dashboard QC information without " "a default dashboard user defined. Please add " "'DEFAULT_DASH_USER' to your config file.") for subject in entries: timepoint = dashboard.get_subject(subject) if not timepoint or not timepoint.sessions: raise MetadataException("{} not found in the in the dashboard " "database.".format(subject)) comment = entries[subject] if not comment: # User was just registering a new QC entry. As long as the # session exists in the database there is no work to do. continue for num in timepoint.sessions: session = timepoint.sessions[num] if session.is_qcd(): # Dont risk writing over QC-ers from the dashboard. continue session.sign_off(user.id)
def get_subject_metadata(config=None, study=None, allow_partial=False): """Returns all QC'd session IDs mapped to any blacklisted scans they have This will collect and organize all checklist and blacklist data for a study. Sessions that do not have a completed checklist entry will have their blacklist entries ommitted from the output unless the 'allow_partial' flag is used. This is done so that partially QC'd subjects do not accidentally get processed by downstream pipelines. Either a study name or a datman config object must be supplied to find the checklist and blacklist contents. Args: config (:obj:`datman.config.config`, optional): A datman config object with the study set to the study of interest. study (:obj:`str`, optional): A datman study name allow_partial (bool, optional): Whether to include blacklist entries if the subject has not been fully QC'd (i.e. if they dont have a completed checklist entry yet). Defaults to False. Returns: dict: A dictionary with any QC'd subject ID mapped to a list of blacklisted scan names that have been mangled to drop the series description and the file extension. """ if not config: if not study: raise MetadataException( "A study name or config object must be " "given to locate study metadata." ) config = datman.config.config(study=study) checklist = read_checklist(config=config) blacklist = read_blacklist(config=config) all_qc = {subid: [] for subid in checklist if checklist[subid]} for bl_entry in blacklist: try: ident, _, _, _ = scanid.parse_filename(bl_entry) except scanid.ParseException: logger.error( f"Malformed scan name {bl_entry} found in blacklist. Ignoring." ) continue subid = ident.get_full_subjectid_with_timepoint() try: all_qc[subid].append(bl_entry) except KeyError: if allow_partial: all_qc.setdefault(subid, []).append(bl_entry) else: logger.error( f"{subid} has blacklisted series {bl_entry} but does not " "appear in QC checklist. Ignoring blacklist entry" ) continue return all_qc
def get_subject_metadata(config=None, study=None): if not config: if not study: raise MetadataException("A study name or config object must be " "given to locate study metadata.") config = datman.config.config(study=study) checklist = read_checklist(config=config) blacklist = read_blacklist(config=config) all_qc = {subid: [] for subid in checklist if checklist[subid]} for bl_entry in blacklist: try: ident, _, _, _ = datman.scanid.parse_filename(bl_entry) except: logger.error("Malformed scan name {} found in blacklist. " "Ignoring.".format(bl_entry)) continue subid = ident.get_full_subjectid_with_timepoint() try: all_qc[subid] except KeyError: logger.error( "{} has blacklisted series {} but does not " "appear in QC checklist. Ignoring blacklist entry".format( subid, bl_entry)) continue all_qc[subid].append(bl_entry) return all_qc
def read_blacklist(study=None, scan=None, subject=None, config=None, path=None): """ This function is used to look up blacklisted scans. If the dashboard is found it ONLY checks the dashboard database. Otherwise it expects a datman style 'blacklist' file on the filesystem. This function can accept: - A study name (nickname, not study tag) - A scan name (may include the full path and extension) - A subject ID - A datman config object, initialized to the study being worked with - A full path directly to a blacklist file. If given, this will circumvent any dashboard database checks and ignore any datman config files. Returns: - A dictionary of scan names mapped to the comment provided when they were blacklisted (Note: If reading from the filesystem, commas contained in comments will be removed) - OR a dictionary of the same format containing only entries for a single subject if a specific subject ID was given - OR the comment for a specific scan if a scan is given - OR 'None' if a scan is given but not found in the blacklist """ if dashboard.dash_found and not path: return _fetch_blacklist(scan=scan, subject=subject, study=study, config=config) if scan: try: ident, tag, series, descr = scanid.parse_filename(scan) except: logger.error("Invalid scan name: {}".format(scan)) return tmp_sub = ident.get_full_subjectid_with_timepoint_session() # Need to drop the path and extension if in the original 'scan' scan = "_".join([str(ident), tag, series, descr]) else: tmp_sub = subject blacklist_path = locate_metadata("blacklist.csv", study=study, subject=tmp_sub, config=config, path=path) try: with open(blacklist_path, 'r') as blacklist: entries = _parse_blacklist(blacklist, scan=scan, subject=subject) except Exception as e: raise MetadataException("Failed to read checklist file {}. Reason - " "{}".format(blacklist_path, str(e))) return entries
def _fetch_blacklist( scan=None, subject=None, bids_ses=None, study=None, config=None, use_bids=False, ): """ Helper function for 'read_blacklist()'. Gets the blacklist contents from the dashboard's database """ if not (scan or subject or study or config): raise MetadataException( "Can't retrieve dashboard blacklist info " "without either 1) a scan name 2) a subject " "ID 3) a study ID or 4) a datman config " "object" ) if scan: if use_bids: db_scan = dashboard.get_bids_scan(scan) else: db_scan = dashboard.get_scan(scan) if db_scan and db_scan.blacklisted(): try: return db_scan.get_comment() except Exception: return db_scan.get_comment() return if subject: if use_bids or bids_ses: if not bids_ses: bids_ses = "01" db_subject = dashboard.get_bids_subject(subject, bids_ses, study) else: db_subject = dashboard.get_subject(subject) blacklist = db_subject.get_blacklist_entries() else: if config: study = config.study_name db_study = dashboard.get_project(study) blacklist = db_study.get_blacklisted_scans() entries = {} for entry in blacklist: if use_bids: if not entry.scan.bids_name: # Ignore scans without a bids name if bids was requested continue scan_name = entry.scan.bids_name else: scan_name = str(entry.scan) + "_" + entry.scan.description entries[scan_name] = entry.comment return entries
def update_checklist(entries, study=None, config=None, path=None): """ Handles QC checklist updates. Will preferentially update the dashboard (ignoring any 'checklist.csv' files) unless the dashboard is not installed or a specific path is given to a file. <entries> should be a dictionary with subject IDs (minus session/repeat) as the keys and qc entries as the value (with an empty string for new/blank QC entries) This will raise a MetadataException if any part of the update fails for any entry. """ if not isinstance(entries, dict): raise MetadataException( "Checklist entries must be in dictionary " "format with subject ID as the key and comment as the value " "(empty string for new, unreviewed subjects)") if dashboard.dash_found and not path: _update_qc_reviewers(entries) return # No dashboard, or path was given, so update file system. checklist_path = locate_metadata('checklist.csv', study=study, config=config, path=path) old_entries = read_checklist(path=checklist_path) # Merge with existing list for subject in entries: try: ident = datman.scanid.parse(subject) except: raise MetadataException("Attempt to add invalid subject ID {} to " "QC checklist".format(subject)) subject = ident.get_full_subjectid_with_timepoint() old_entries[subject] = entries[subject] # Reformat to expected checklist line format lines = [ "qc_{}.html {}\n".format(sub, old_entries[sub]) for sub in old_entries ] write_metadata(sorted(lines), checklist_path)
def _update_scan_checklist(entries): """ Helper function for 'update_blacklist()'. Updates the dashboard's database. """ try: user = dashboard.get_default_user() except: raise MetadataException( "Can't update dashboard QC information without " "a default dashboard user defined. Please add " "'DEFAULT_DASH_USER' to your config file.") for scan_name in entries: scan = dashboard.get_scan(scan_name) if not scan: raise MetadataException("{} does not exist in the dashboard " "database".format(scan_name)) scan.add_checklist_entry(user.id, comment=entries[scan_name], sign_off=False)
def _parse_checklist(checklist, subject=None): """ Support function for read_checklist(). Gets a list of existing / signed off sessions from a checklist.csv file. The 'checklist' argument is expected to be a handler for an already opened file. Returns: A dictionary of subject IDs (minus session/repeat num) mapped to their QC comments (or an empty string if it's a new entry). Or a single comment string if the 'subject' option was used """ if subject: entries = None else: entries = {} for line in checklist.readlines(): fields = line.split() if not fields: # Ignore blank lines continue try: subid = os.path.splitext(fields[0].replace("qc_", ""))[0] except (IndexError, TypeError): raise MetadataException(f"Found malformed checklist entry: {line}") try: scanid.parse(subid) except scanid.ParseException: logger.error( f"Found malformed subject ID {subid} in checklist. Ignoring." ) continue if entries and subid in entries: logger.info( f"Found duplicate checklist entries for {subid}. Ignoring " "all except the first entry found." ) continue comment = " ".join(fields[1:]).strip() if subject: if subid != subject: continue return comment else: entries[subid] = comment return entries
def write_metadata(lines, path, retry=3): """ Repeatedly attempts to write lines to <path>. The destination file will be overwritten with <lines> so any contents you wish to preserve should be contained within the list. """ if not retry: raise MetadataException("Failed to update {}".format(path)) try: with open(path, "w") as meta_file: meta_file.writelines(lines) except: logger.error("Failed to write metadata file {}. Tries " "remaining - {}".format(path, retry)) wait_time = random.uniform(0, 10) time.sleep(wait_time) write_metadata(lines, path, retry=retry - 1)
def locate_metadata(filename, study=None, subject=None, config=None, path=None): if not (path or study or config or subject): raise MetadataException( f"Can't locate metadata file {filename} without either " "1) a full path to the file 2) a study or " "subject ID or 3) a datman.config " "object" ) if path: file_path = path else: if not config: given_study = subject or study config = datman.config.config(study=given_study) file_path = os.path.join(config.get_path("meta"), filename) return file_path
def _fetch_checklist(subject=None, study=None, config=None): """ Support function for read_checklist(). Gets a list of existing / signed off sessions from the dashboard. The checklist.csv file dropped the session number, so only information on the first session is reported to maintain consistency. :( Returns a dictionary formatted like that of '_parse_checklist' or a string comment if the 'subject' argument was given """ if not (subject or study or config): raise MetadataException( "Can't retrieve dashboard checklist " "contents without either 1) a subject or study ID 2) a " "datman.config object") if subject: session = dashboard.get_session(subject) if not session: return if session.signed_off: return str(session.reviewer) return '' if config and not study: study = config.study_name db_study = dashboard.get_project(study) entries = {} for timepoint in db_study.timepoints: if timepoint.is_phantom or not len(timepoint.sessions): continue session = timepoint.sessions.values()[0] if session.signed_off: comment = str(session.reviewer) else: comment = '' str_name = timepoint.name.encode('utf-8') entries[str_name] = comment return entries
def _fetch_blacklist(scan=None, subject=None, study=None, config=None): """ Helper function for 'read_blacklist()'. Gets the blacklist contents from the dashboard's database """ if not (scan or subject or study or config): raise MetadataException( "Can't retrieve dashboard blacklist info " "without either 1) a scan name 2) a subject ID 3) a study ID or " "4) a datman config object") if scan: db_scan = dashboard.get_scan(scan) if db_scan and db_scan.blacklisted(): try: return db_scan.get_comment().encode('utf-8') except: return db_scan.get_comment() return if subject: db_subject = dashboard.get_subject(subject) blacklist = db_subject.get_blacklist_entries() else: if config: study = config.study_name db_study = dashboard.get_project(study) blacklist = db_study.get_blacklisted_scans() entries = {} for entry in blacklist: scan_name = str(entry.scan) + "_" + entry.scan.description try: scan_name = scan_name.encode('utf-8') comment = entry.comment.encode('utf-8') except: comment = entry.comment entries[scan_name] = comment return entries
def read_checklist(study=None, subject=None, config=None, path=None): """ This function is used to look-up QC checklist entries. If the dashboard is found it will ONLY check the dashboard database, otherwise it expects a datman style 'checklist' file on the filesystem. This function can accept either: 1) A study name (nickname, not the study tag) or subject ID (Including a session number) 2) A datman config object, initialized to the study being worked with 3) A full path directly to a checklist file (Will circumvent the dashboard database check and ignore any datman config files) Returns: - A dictionary of subject IDs mapped to their comment / name of the person who signed off on their data - OR the comment for a specific subject if a subject ID is given - OR 'None' if a specific subject ID is given and they're not found in the list """ if not (study or subject or config or path): raise MetadataException( "Can't read dashboard checklist " "contents without either 1) a subject or study ID 2) a " "datman.config object or 3) a full path to the checklist") if subject: ident = datman.scanid.parse(subject) if dashboard.dash_found and not path: if subject: subject = ident.get_full_subjectid_with_timepoint_session() try: entries = _fetch_checklist(subject=subject, study=study, config=config) except Exception as e: raise MetadataException( "Can't retrieve checklist information " "from dashboard database. Reason - {}".format(str(e))) return entries logger.info("Dashboard not found, attempting to find a checklist " "metadata file instead.") checklist_path = locate_metadata('checklist.csv', path=path, subject=subject, study=study, config=config) if subject: subject = ident.get_full_subjectid_with_timepoint() try: with open(checklist_path, 'r') as checklist: entries = _parse_checklist(checklist, subject=subject) except Exception as e: raise MetadataException("Failed to read checklist file " "{}. Reason - {}".format( checklist_path, str(e))) return entries
def _fetch_checklist( subject=None, study=None, config=None, bids_id=None, bids_ses=None, use_bids=None, ): """ Support function for read_checklist(). Gets a list of existing / signed off sessions from the dashboard. The checklist.csv file dropped the session number, so only information on the first session is reported to maintain consistency. :( Returns a dictionary formatted like that of '_parse_checklist' or a string comment if the 'subject' argument was given. If 'use_bids' is specified, the checklist will be organized with BIDS IDs instead of datman IDs. Subjects missing a bids ID will be omitted. """ if not (subject or study or config): raise MetadataException("Can't retrieve dashboard checklist " "contents without either 1) a subject or " "study ID 2) a datman.config object") if subject: session = dashboard.get_session(subject) if bids_id: if not (study and bids_ses): raise MetadataException( "Cant retrieve checklist entry for BIDS " f"ID {bids_id} without a study and BIDS session " "number") session = dashboard.get_bids_subject(bids_id, bids_ses, study=study) if subject or bids_id: if not session: return if session.is_qcd(): return str(session.reviewer) return "" if config and not study: study = config.study_name db_study = dashboard.get_project(study) entries = {} for timepoint in db_study.timepoints: if timepoint.is_phantom or not len(timepoint.sessions): continue session = list(timepoint.sessions.values())[0] if session.signed_off: comment = str(session.reviewer) else: comment = "" if use_bids: if not timepoint.bids_name: # If bids is requested ignore subjects without a bids name continue str_name = timepoint.bids_name else: str_name = timepoint.name entries[str_name] = comment return entries
def read_checklist( study=None, subject=None, config=None, path=None, bids_id=None, bids_ses=None, use_bids=False, ): """ This function is used to look-up QC checklist entries. If the dashboard is found it will ONLY check the dashboard database, otherwise it expects a datman style 'checklist' file on the filesystem. This function can accept either: 1) A study name (nickname, not the study tag) or subject ID (Including a session number and may use BIDS ID instead of datman ID) 2) A datman config object, initialized to the study being worked with 3) A full path directly to a checklist file (Will circumvent the dashboard database check and ignore any datman config files) Set use_bids=True to return an entire study's checklist organized by BIDS name instead of datman name. This option only works with dashboard integration. Returns: - A dictionary of subject IDs mapped to their comment / name of the person who signed off on their data - OR the comment for a specific subject if a subject ID is given - OR 'None' if a specific subject ID is given and they're not found in the list """ if not (study or subject or config or path or bids_id): raise MetadataException("Can't read dashboard checklist " "contents without either 1) a subject or " "study ID 2) a datman.config object or 3) a " "full path to the checklist") if bids_id and not study: raise MetadataException("Must provide a study to search by BIDS ID") if subject: ident = scanid.parse(subject) if dashboard.dash_found and not path: if subject: subject = ident.get_full_subjectid_with_timepoint_session() if bids_id and not bids_ses: bids_ses = "01" if bids_id and not type(bids_ses) == str: bids_ses = f"{bids_ses:02d}" try: entries = _fetch_checklist( subject=subject, study=study, config=config, bids_id=bids_id, bids_ses=bids_ses, use_bids=use_bids, ) except Exception as e: raise MetadataException("Can't retrieve checklist information " "from dashboard database. Reason - " f"{str(e)}") return entries logger.info("Dashboard not found, attempting to find a checklist " "metadata file instead.") if use_bids or bids_id: raise MetadataException( "BIDS IDs may only be used if querying the dashboard database.") checklist_path = locate_metadata("checklist.csv", path=path, subject=subject, study=study, config=config) if subject: subject = ident.get_full_subjectid_with_timepoint() try: with open(checklist_path, "r") as checklist: entries = _parse_checklist(checklist, subject=subject) except Exception as e: raise MetadataException( f"Failed to read checklist file {checklist_path}. Reason - {str(e)}" ) return entries