def update_blacklist(entries, study=None, config=None, path=None): if not isinstance(entries, dict): raise MetadataException("Blacklist entries must be in dictionary " "format with scan name as the key and reason " "for blacklisting as the value") if dashboard.dash_found and not path: _update_scan_checklist(entries) return blacklist_path = locate_metadata("blacklist.csv", study=study, config=config, path=path) old_entries = read_blacklist(path=blacklist_path) for scan_name in entries: try: scanid.parse_filename(scan_name) except scanid.ParseException: raise MetadataException( f"Attempt to add invalid scan name {scan_name} to blacklist") if not entries[scan_name]: logger.error("Can't add blacklist entry with empty comment. " f"Skipping {scan_name}") continue old_entries[scan_name] = entries[scan_name] lines = [f"{sub} {old_entries[sub]}\n" for sub in old_entries] new_list = ["series\treason\n"] new_list.extend(sorted(lines)) write_metadata(new_list, blacklist_path)
def test_parse_filename_with_path(): ident, tag, series, description = scanid.parse_filename( '/data/DTI_CMH_H001_01_01_T1_02_description.nii.gz') eq_(str(ident), 'DTI_CMH_H001_01_01') eq_(tag, 'T1') eq_(series, '02') eq_(description, 'description')
def test_parse_filename_with_path(): ident, tag, series, description = scanid.parse_filename( '/data/DTI_CMH_H001_01_01_T1_02_description.nii.gz') assert str(ident) == 'DTI_CMH_H001_01_01' assert tag == 'T1' assert series == '02' assert description == 'description'
def test_parse_filename(): ident, tag, series, description = scanid.parse_filename( 'DTI_CMH_H001_01_01_T1_03_description.nii.gz') eq_(str(ident), 'DTI_CMH_H001_01_01') eq_(tag, 'T1') eq_(series,'03') eq_(description, 'description')
def test_parse_filename_PHA(): ident, tag, series, description = scanid.parse_filename( 'DTI_CMH_PHA_ADN0001_T1_02_description.nii.gz') eq_(str(ident), 'DTI_CMH_PHA_ADN0001') eq_(tag, 'T1') eq_(series,'02') eq_(description, 'description')
def test_parse_filename_parses_when_tag_contains_kcniish_SE_substring(): ident, tag, series, description = scanid.parse_filename( "CLZ_CMP_0000_01_01_SEABC_11_FieldMap-2mm.nii.gz") assert str(ident) == "CLZ_CMP_0000_01_01" assert tag == "SEABC" assert series == "11" assert description == "FieldMap-2mm" _, tag, _, _ = scanid.parse_filename( "CLZ_CMP_0000_01_01_ABCSE_11_FieldMap-2mm") assert tag == "ABCSE" _, tag, _, _ = scanid.parse_filename( "CLZ_CMP_0000_01_01_ABCSEDEF_11_FieldMap-2mm") assert tag == "ABCSEDEF"
def test_parse_filename_PHA(): ident, tag, series, description = scanid.parse_filename( 'DTI_CMH_PHA_ADN0001_T1_02_description.nii.gz') assert str(ident) == 'DTI_CMH_PHA_ADN0001' assert tag == 'T1' assert series == '02' assert description == 'description'
def test_parse_filename(): ident, tag, series, description = scanid.parse_filename( 'DTI_CMH_H001_01_01_T1_03_description.nii.gz') assert str(ident) == 'DTI_CMH_H001_01_01' assert tag == 'T1' assert series == '03' assert description == 'description'
def test_parse_filename_parses_when_tag_contains_pha(): ident, tag, series, description = scanid.parse_filename( "CLZ_CMP_0000_01_01_PHABCD_11_FieldMap-2mm") assert str(ident) == "CLZ_CMP_0000_01_01" assert tag == "PHABCD" assert series == "11" assert description == "FieldMap-2mm" _, tag, _, _ = scanid.parse_filename( "CLZ_CMP_0000_01_01_ABCPHA_11_FieldMap-2mm") assert tag == "ABCPHA" _, tag, _, _ = scanid.parse_filename( "CLZ_CMP_0000_01_01_ABCPHADEF_11_FieldMap-2mm") assert tag == "ABCPHADEF"
def test_parse_filename_PHA(): ident, tag, series, description = scanid.parse_filename( 'DTI_CMH_PHA_ADN0001_T1_02_description.nii.gz') eq_(str(ident), 'DTI_CMH_PHA_ADN0001') eq_(tag, 'T1') eq_(series, '02') eq_(description, 'description')
def test_parse_filename(): ident, tag, series, description = scanid.parse_filename( 'DTI_CMH_H001_01_01_T1_03_description.nii.gz') eq_(str(ident), 'DTI_CMH_H001_01_01') eq_(tag, 'T1') eq_(series, '03') eq_(description, 'description')
def add_pipeline_blacklist(subjects, blacklist_file): if not os.path.exists(blacklist_file): logger.error("The given pipeline specific blacklist does not exist: " "{}".format(blacklist_file)) sys.exit(1) try: with open(blacklist_file, 'r') as blacklist_data: blacklist = blacklist_data.readlines() except IOError: logger.error("Cannot read blacklist {}".format(blacklist_file)) sys.exit(1) for entry in blacklist: entry = os.path.basename(entry) entry = entry.replace('.nii', '').replace('.gz', '').strip() try: ident, tag, _, _ = scanid.parse_filename(entry) except scanid.ParseException: logger.debug("Cannot parse blacklist entry: {}. " "Skipping.".format(entry)) continue subid = ident.get_full_subjectid_with_timepoint() try: subjects[subid].append(entry) except IndexError: logger.debug("Blacklisted item given for subject not in " "checklist.csv. Ignoring entry {}".format(entry)) continue return subjects
def check_inputs(config, tag, path, expected_tags): """ Ensures we have the same number of input files as we have defined in ExportInfo. """ if not expected_tags: raise Exception('expected tag {} not found in {}'.format(tag, path)) n_found = len(expected_tags) site = sid.parse_filename(expected_tags[0])[0].site tag_info = config.get_tags(site) try: if tag in tag_info: n_expected = tag_info.get(tag, 'Count') elif tag in config.study_config['Sites'][site]['links'].keys(): n_expected = config.study_config['Sites'][site]['links'][tag][ 'Count'] else: raise Exception except: raise Exception( 'tag {} not defined in Sites:site:ExportInfo or Sites:site:links'. format(tag)) if n_found != n_expected: raise Exception( 'number of files found with tag {} was {}, expected {}'.format( tag, n_found, n_expected))
def get_subject_metadata(config=None, study=None, allow_partial=False): """Returns all QC'd session IDs mapped to any blacklisted scans they have This will collect and organize all checklist and blacklist data for a study. Sessions that do not have a completed checklist entry will have their blacklist entries ommitted from the output unless the 'allow_partial' flag is used. This is done so that partially QC'd subjects do not accidentally get processed by downstream pipelines. Either a study name or a datman config object must be supplied to find the checklist and blacklist contents. Args: config (:obj:`datman.config.config`, optional): A datman config object with the study set to the study of interest. study (:obj:`str`, optional): A datman study name allow_partial (bool, optional): Whether to include blacklist entries if the subject has not been fully QC'd (i.e. if they dont have a completed checklist entry yet). Defaults to False. Returns: dict: A dictionary with any QC'd subject ID mapped to a list of blacklisted scan names that have been mangled to drop the series description and the file extension. """ if not config: if not study: raise MetadataException( "A study name or config object must be " "given to locate study metadata." ) config = datman.config.config(study=study) checklist = read_checklist(config=config) blacklist = read_blacklist(config=config) all_qc = {subid: [] for subid in checklist if checklist[subid]} for bl_entry in blacklist: try: ident, _, _, _ = scanid.parse_filename(bl_entry) except scanid.ParseException: logger.error( f"Malformed scan name {bl_entry} found in blacklist. Ignoring." ) continue subid = ident.get_full_subjectid_with_timepoint() try: all_qc[subid].append(bl_entry) except KeyError: if allow_partial: all_qc.setdefault(subid, []).append(bl_entry) else: logger.error( f"{subid} has blacklisted series {bl_entry} but does not " "appear in QC checklist. Ignoring blacklist entry" ) continue return all_qc
def read_blacklist(study=None, scan=None, subject=None, config=None, path=None): """ This function is used to look up blacklisted scans. If the dashboard is found it ONLY checks the dashboard database. Otherwise it expects a datman style 'blacklist' file on the filesystem. This function can accept: - A study name (nickname, not study tag) - A scan name (may include the full path and extension) - A subject ID - A datman config object, initialized to the study being worked with - A full path directly to a blacklist file. If given, this will circumvent any dashboard database checks and ignore any datman config files. Returns: - A dictionary of scan names mapped to the comment provided when they were blacklisted (Note: If reading from the filesystem, commas contained in comments will be removed) - OR a dictionary of the same format containing only entries for a single subject if a specific subject ID was given - OR the comment for a specific scan if a scan is given - OR 'None' if a scan is given but not found in the blacklist """ if dashboard.dash_found and not path: return _fetch_blacklist(scan=scan, subject=subject, study=study, config=config) if scan: try: ident, tag, series, descr = scanid.parse_filename(scan) except: logger.error("Invalid scan name: {}".format(scan)) return tmp_sub = ident.get_full_subjectid_with_timepoint_session() # Need to drop the path and extension if in the original 'scan' scan = "_".join([str(ident), tag, series, descr]) else: tmp_sub = subject blacklist_path = locate_metadata("blacklist.csv", study=study, subject=tmp_sub, config=config, path=path) try: with open(blacklist_path, 'r') as blacklist: entries = _parse_blacklist(blacklist, scan=scan, subject=subject) except Exception as e: raise MetadataException("Failed to read checklist file {}. Reason - " "{}".format(blacklist_path, str(e))) return entries
def _parse_blacklist(blacklist, scan=None, subject=None): """ Helper function for 'read_blacklist()'. Gets the blacklist contents from the file system """ if scan: entries = None else: entries = {} # This will mangle any commas in comments, but is the most reliable way # to split the lines regex = ",|\s" # noqa: W605 for line in blacklist: fields = re.split(regex, line.strip()) try: scan_name = fields[0] scanid.parse_filename(scan_name) comment = fields[1:] except (IndexError, scanid.ParseException): logger.info(f"Ignoring malformed line: {line}") continue comment = " ".join(comment).strip() if scan_name == "series": continue if scan: if scan_name == scan: return comment continue if subject and not scan_name.startswith(subject): continue if entries and scan_name in entries: logger.info( f"Found duplicate blacklist entries for {scan_name}. Ignoring " "all except the first entry found." ) continue entries[scan_name] = comment return entries
def test_parse_filename_PHA_2(): ident, tag, series, description = scanid.parse_filename( 'SPN01_MRC_PHA_FBN0013_RST_04_EPI-3x3x4xTR2.nii.gz') assert ident.study == 'SPN01' assert ident.site == 'MRC' assert ident.subject == 'PHA_FBN0013' assert ident.timepoint == '' assert ident.session == '' assert str(ident) == 'SPN01_MRC_PHA_FBN0013' assert tag == 'RST' assert series == '04' assert description == 'EPI-3x3x4xTR2'
def test_parse_filename_PHA_2(): ident, tag, series, description = scanid.parse_filename( 'SPN01_MRC_PHA_FBN0013_RST_04_EPI-3x3x4xTR2.nii.gz') eq_(ident.study,'SPN01') eq_(ident.site,'MRC') eq_(ident.subject,'PHA_FBN0013') eq_(ident.timepoint,'') eq_(ident.session,'') eq_(str(ident),'SPN01_MRC_PHA_FBN0013') eq_(tag,'RST') eq_(series,'04') eq_(description,'EPI-3x3x4xTR2')
def test_parse_filename_PHA_2(): ident, tag, series, description = scanid.parse_filename( 'SPN01_MRC_PHA_FBN0013_RST_04_EPI-3x3x4xTR2.nii.gz') eq_(ident.study, 'SPN01') eq_(ident.site, 'MRC') eq_(ident.subject, 'PHA_FBN0013') eq_(ident.timepoint, '') eq_(ident.session, '') eq_(str(ident), 'SPN01_MRC_PHA_FBN0013') eq_(tag, 'RST') eq_(series, '04') eq_(description, 'EPI-3x3x4xTR2')
def find_scans(search_str): """ Used by the dashboard's search bar and so must work around fuzzy user input. """ search_str = search_str.strip().upper() try: ident, tag, series, _ = scanid.parse_filename(search_str) except: try: ident = scanid.parse(search_str) except: # Doesnt match a file name or a subject ID so fuzzy search # for... # matching scan name query = Scan.query.filter( func.upper(Scan.name).contains(search_str)) if query.count() == 0: # or matching subid query = Scan.query.filter( func.upper(Scan.timepoint).contains(search_str)) if query.count() == 0: # or matching tags query = Scan.query.filter( func.upper(Scan.tag).contains(search_str)) if query.count() == 0: # or matching series description query = Scan.query.filter( func.upper(Scan.description).contains(search_str)) else: if ident.session: query = Scan.query.filter( and_( func.upper(Scan.timepoint) == ident.get_full_subjectid_with_timepoint(), Scan.repeat == int(ident.session))) if not query.count(): ident.session = None if not ident.session: query = Scan.query.filter( (func.upper(Scan.timepoint) == ident.get_full_subjectid_with_timepoint())) else: name = "_".join( [ident.get_full_subjectid_with_timepoint_session(), tag, series]) query = Scan.query.filter(func.upper(Scan.name).contains(name)) return query.all()
def __init__(self, path): self.path = path self.ext = datman.utils.get_extension(path) self.file_name = os.path.basename(self.path) path_minus_ext = path.replace(self.ext, "") try: ident, tag, series, description = scanid.parse_filename(path_minus_ext) except datman.scanid.ParseException: # re-raise the exception with a more descriptive message message = "{} does not match datman convention".format(path_minus_ext) raise datman.scanid.ParseException(message) DatmanNamed.__init__(self, ident) self.tag = tag self.series_num = series self.description = description
def __init__(self, path): self.path = path self.ext = datman.utils.get_extension(path) self.file_name = os.path.basename(self.path) path_minus_ext = path.replace(self.ext, "") try: ident, tag, series, description = scanid.parse_filename( path_minus_ext) except datman.scanid.ParseException: # re-raise the exception with a more descriptive message message = f"{path_minus_ext} does not match datman convention" raise datman.scanid.ParseException(message) DatmanNamed.__init__(self, ident) self.tag = tag self.series_num = series self.description = description
def check_blacklist(scan_name, study=None): """Reads the checklist identified from the session_name If there is an entry returns the comment, otherwise returns None """ try: ident, tag, series_num, _ = scanid.parse_filename(scan_name) blacklist_id = "_".join([str(ident), tag, series_num]) except scanid.ParseException: logger.warning('Invalid session id:{}'.format(scan_name)) return if study: cfg = datman.config.config(study=study) else: cfg = datman.config.config( study=ident.get_full_subjectid_with_timepoint()) try: checklist_path = os.path.join(cfg.get_path('meta'), 'blacklist.csv') except KeyError: logger.warning( 'Unable to identify meta path for study:{}'.format(study)) return try: with open(checklist_path, 'r') as f: lines = f.readlines() except IOError: logger.warning('Unable to open blacklist file:{} for reading'.format( checklist_path)) return for line in lines: parts = line.split(None, 1) if parts: # fix for empty lines if blacklist_id in parts[0]: try: return parts[1].strip() except IndexError: return
def get_files_with_tag(parentdir, tag, fuzzy=False): """ Returns a list of files that have the specified tag. Filenames must conform to the datman naming convention (see scanid.parse_filename) in order to be considered. If fuzzy == True, then filenames are matched if the given tag is found within the filename's tag. """ files = [] for f in os.listdir(parentdir): try: _, filetag, _, _ = scanid.parse_filename(f) if tag == filetag or (fuzzy and tag in filetag): files.append(os.path.join(parentdir, f)) except scanid.ParseException: continue return files
def get_files(session, filename): """ Starts with a file in the nii folder Checks if the file is a DTI type, and session is not a phantom Checks to see if a SlicerTractography file exists in the dtiprep folder Returns a tuple(dti_file, tract_file) or none """ if not filename.endswith('.nii.gz'): logger.info('File:{} is not a nifti file. Skipping' .format(filename)) return try: ident, tag, series, desc = scanid.parse_filename(filename) except scanid.ParseException: logger.debug('Invalid filename:{}'.format(filename)) return if scanid.is_phantom(ident.get_full_subjectid_with_timepoint()): msg = "Session:{} is a phantom. Skipping".format(session) logger.info(msg) return if not tag in TAGS: msg = ("File:{} is not in taglist:{}. Skipping" .format(os.path.basename(filename), TAGS)) return base_name = scanid.make_filename(ident, tag, series, desc) + '_SlicerTractography.vtk' tract_file = os.path.join(DTIPREP_PATH, session, base_name) if not os.path.isfile(tract_file): logger.info('Tract file:{} not found.'.format(tract_file)) return return(filename, tract_file)
def get_missing_data(data, nii_file): ident, _, _, _ = scanid.parse_filename(nii_file) try: img = nibabel.load(nii_file) except: logger.error("Could not open {}".format(nii_file)) return if ('EffectiveEchoSpacing' not in data.keys()) and ident.site == 'CMH': data['EffectiveEchoSpacing'] = 0.000342 if "RepetitionTime" not in data.keys(): data['RepetitionTime'] = int(img.header['pixdim'][4]) if ("task" in nii_file): slices = float(img.shape[2]) tr = float(data['RepetitionTime']) spacing = tr / slices timing_list = [round(x, 4) for x in numpy.arange(0, tr, spacing)] half = len(timing_list) // 2 first = timing_list[:half] second = timing_list[half:] to_return = list() while (len(first) > 0 and len(second) > 0): to_return.append(first.pop(0)) to_return.append(second.pop(0)) to_return += first + second data['SliceTiming'] = to_return if "TotalReadoutTime" not in data.keys(): try: axis = {'i': 0, 'j': 1, 'k': 2}[data['PhaseEncodingDirection'][0]] npe = img.shape[axis] acc = 1.0 if 'ParallelReductionFactorInPlane' in data.keys(): acc = data['ParallelReductionFactorInPlane'] data["TotalReadoutTime"] = str( float(data["EffectiveEchoSpacing"]) * (npe / acc - 1)) except KeyError, key: logger.info( "Total readout time cannot be calculated due to missing information {} in JSON for: {}" .format(key, nii_file))
def check_inputs(config, tag, path, expected_tags): """ Ensures we have the same number of input files as we have defined in ExportInfo. """ if not expected_tags: raise Exception('expected tag {} not found in {}'.format(tag, path)) n_found = len(expected_tags) site = sid.parse_filename(expected_tags[0])[0].site tag_info = config.get_tags(site) try: if tag in tag_info: n_expected = tag_info.get(tag, 'Count') elif tag in config.study_config['Sites'][site]['links'].keys(): n_expected = config.study_config['Sites'][site]['links'][tag]['Count'] else: raise Exception except: raise Exception('tag {} not defined in Sites:site:ExportInfo or Sites:site:links'.format(tag)) if n_found != n_expected: logger.warning('Found {} files with tag {}, expected {}, check outputs to ensure quality'.format(n_found,tag,n_expected))
def validify_file(sub_nii_dir): nii_list = os.listdir(sub_nii_dir) # nii_list = [x for x in nii_list if x.endswith('nii.gz')] invalid_filenames = list() ses_ser_file_map = dict() for nii in nii_list: try: nii_ident, _, nii_ser, _ = scanid.parse_filename(nii) except: invalid_filenames.append(nii) continue nii_ses = nii_ident.session if nii_ses not in ses_ser_file_map.keys(): ses_ser_file_map[nii_ses] = dict() ses_ser_file_map[nii_ses][nii_ser] = list() [nii_list.remove(x) for x in invalid_filenames] blacklist_files = set() match_six = {ses: LifoQueue() for ses in ses_ser_file_map.keys()} match_eight = {ses: LifoQueue() for ses in ses_ser_file_map.keys()} for filename in sorted( nii_list, key=lambda x: (scanid.parse_filename(x)[0].session, scanid.parse_filename(x)[2])): ident, tag, series, description = scanid.parse_filename(filename) ext = os.path.splitext(filename)[1] session = ident.session ses_ser_file_map[session][series].append(filename) ses_ser = (session, series) # fmap validation if tag in ['ECHO1', 'FMRI-DAP'] and ext == '.gz': if 'flipangle' in filename: blacklist_files.add(ses_ser) else: match_six[session].put(series) elif tag in ['ECHO2', 'FMRI-DPA'] and ext == '.gz': if 'flipangle' in filename: blacklist_files.add(ses_ser) else: match_eight[session].put(series) # anat validation if tag in tag_map['anat'] and ext == '.json_file': json_file = os.path.join(sub_nii_dir, filename) try: json_data = json.load(open(json_file)) except IOError: continue if "NORM" in json_data["ImageType"]: logger.info( "File has ImageType NORM. Skipping: {}".format(filename)) blacklist_files.add(ses_ser) matched_fmaps = {ses: list() for ses in ses_ser_file_map.keys()} for ses in ses_ser_file_map.keys(): while not (match_six[ses].empty() or match_eight[ses].empty()): six = match_six[ses].get() eight = match_eight[ses].get() matched_fmaps[ses].append((six, eight)) logger.info("Matched FMAP series for session {0}: {1} {2}".format( ses, six, eight)) for ses in ses_ser_file_map.keys(): for match in [match_six, match_six]: while not match[ses].empty(): not_matched = match[ses].get() blacklist_files.add((ses, not_matched)) logger.info( "FMAP series not matched: Session {}. Series {} ".format( ses, not_matched)) for (ses, ser) in blacklist_files: ses_ser_file_map[ses].pop(ser) return ses_ser_file_map, matched_fmaps
import re import datetime, time import traceback import nibabel, numpy import glob, fnmatch from docopt import docopt from shutil import copyfile, copytree from distutils import dir_util from queue import * from collections import Counter logger = logging.getLogger(__name__) dmlogger = logging.getLogger('datman.utils') tag_map = dict() get_session_series = lambda x: (scanid.parse_filename(x)[0].session, scanid.parse_filename(x)[2]) get_series = lambda x: scanid.parse_filename(x)[2] get_tag = lambda x: scanid.parse_filename(x)[1] def validify_fmap(fmap): img = nibabel.load(fmap) hdr = img.header if (hdr['srow_z'][2] == 0): value = hdr['pixdim'][3] hdr['srow_z'][2] = value img.affine[2][2] = value nibabel.save(img, fmap)