def load_sublist(self): """Load the participant list YAML file into a dictionary and check. - subdict format: {'sub_01': {'session_01': {'anatomical_scan': {'scan_01': <filepath>, 'scan_02': <filepath>}, 'site_name': 'Site_1'} }, 'sub_02': {..} } :rtype: dictionary :return: The participant list in a dictionary. """ import yaml from qap.qap_utils import raise_smart_exception if "subject_list" in self._config.keys(): with open(self._config["subject_list"], "r") as f: subdict = yaml.load(f) else: msg = "\n\n[!] There is no participant list YML to read.\n\n" raise_smart_exception(locals(), msg) if len(subdict) == 0: msg = "The participant list provided is either empty or could " \ "not be read properly!" raise_smart_exception(locals(), msg) return subdict
def load_sublist(self): """Load the participant list YAML file into a dictionary and check. - subdict format: {'sub_01': {'session_01': {'anatomical_scan': {'scan_01': <filepath>, 'scan_02': <filepath>}, 'site_name': 'Site_1'} }, 'sub_02': {..} } :rtype: dictionary :return: The participant list in a dictionary. """ import yaml from qap.qap_utils import raise_smart_exception if "subject_list" in self._config.keys(): with open(self._config["subject_list"], "r") as f: subdict = yaml.load(f) else: msg = "\n\n[!] There is no participant list YML to read.\n\n" raise_smart_exception(locals(),msg) if len(subdict) == 0: msg = "The participant list provided is either empty or could " \ "not be read properly!" raise_smart_exception(locals(),msg) return subdict
def check_input_resources(resource_pool, resource_name): """Check to make sure a specific resource/file is present in the resource pool. :type resource_pool: dict :param resource_pool: The resource pool of resources (which includes output files of sub-workflows, and connection pointers for Nipype nodes/workflows). :type resource_name: str :param resource_name: The name of the output/intermediary file to check for within the resource pool. """ import os if resource_name not in resource_pool.keys(): err = "Resource pool: %s\n\n[!] The resource '%s' is missing " \ "from the resource pool, and it is needed in one of the steps " \ "of the pipeline. Please make sure it is specified " \ "properly." % (resource_pool, resource_name) raise_smart_exception(locals(), err) else: if len(resource_pool[resource_name]) > 2: if not os.path.isfile(resource_pool[resource_name]): err = "[!] The path provided for the resource '%s' " \ "does not exist!\nPath provided: %s" % \ (resource_name, resource_pool[resource_name]) raise_smart_exception(locals(), err)
def create_anatomical_background_mask(anatomical_data, fg_mask_data, exclude_zeroes=False): """Create a mask of the area outside the head in an anatomical scan by inverting a provided foreground mask. :type anatomical_data: NumPy array :param anatomical_data: An array of the raw anatomical data. :type fg_mask_data: NumPy array :param fg_mask_data: An array of binary foreground mask data. :type exclude_zeroes: bool :param exclude_zeroes: (default: False) Flag to exclude pure zero values when creating the background mask. :rtype: Nibabel data :return bg_mask_data: Background mask data in Nibabel format. """ from qap.qap_utils import raise_smart_exception # invert the foreground mask try: bg_mask_data = 1 - fg_mask_data except Exception as e: err = "\n\n[!] Input data must be a NumPy array object, and not a " \ "list.\n\nError details: %s\n\n" % e raise_smart_exception(locals(),err) if exclude_zeroes: # modify the mask to exclude zeroes in the background of the # anatomical image, as these are often introduced artificially and can # skew the QAP metric results bool_anat_data = anatomical_data > 0 bg_mask_data = bg_mask_data * bool_anat_data return bg_mask_data
def outlier_timepoints(func_file, mask_file=None, out_fraction=True): """Calculates the number of 'outliers' in a 4D functional dataset, at each time-point using AFNI's 3dToutcount. - Uses AFNI 3dToutcount. More info here: https://afni.nimh.nih.gov/pub/dist/doc/program_help/3dToutcount.html - Used for the 'Fraction of Outliers' QAP functional temporal metrics. :type func_file: str :param func_file: Path to 4D functional timeseries NIFTI file. :type mask_file: str :param mask_file: Path to the functional binary brain mask NIFTI file. :type out_fraction: bool :param out_fraction: (default: True) Whether the output should be a count (False) or fraction (True) of the number of masked voxels which are outliers at each time point. :rtype: list :return: A list of outlier values from AFNI 3dToutcount. """ import commands from qap.qap_utils import raise_smart_exception opts = [] if out_fraction: opts.append("-fraction") if mask_file: opts.append("-mask %s" % mask_file) opts.append(func_file) str_opts = " ".join(opts) # TODO: # check if should use -polort 2 # (http://www.na-mic.org/Wiki/images/8/86/FBIRNSupplementalMaterial082005.pdf) # or -legendre to remove any trend cmd = "3dToutcount %s" % str_opts try: out = commands.getoutput(cmd) except: err = "[!] QAP says: Something went wrong with running AFNI's " \ "3dToutcount." raise_smart_exception(locals(), err) # remove general information and warnings outliers = pass_floats(out) return outliers
def outlier_timepoints(func_file, mask_file=None, out_fraction=True): """Calculates the number of 'outliers' in a 4D functional dataset, at each time-point using AFNI's 3dToutcount. - Uses AFNI 3dToutcount. More info here: https://afni.nimh.nih.gov/pub/dist/doc/program_help/3dToutcount.html - Used for the 'Fraction of Outliers' QAP functional temporal metrics. :type func_file: str :param func_file: Path to 4D functional timeseries NIFTI file. :type mask_file: str :param mask_file: Path to the functional binary brain mask NIFTI file. :type out_fraction: bool :param out_fraction: (default: True) Whether the output should be a count (False) or fraction (True) of the number of masked voxels which are outliers at each time point. :rtype: list :return: A list of outlier values from AFNI 3dToutcount. """ import commands from qap.qap_utils import raise_smart_exception opts = [] if out_fraction: opts.append("-fraction") if mask_file: opts.append("-mask %s" % mask_file) opts.append(func_file) str_opts = " ".join(opts) # TODO: # check if should use -polort 2 # (http://www.na-mic.org/Wiki/images/8/86/FBIRNSupplementalMaterial082005.pdf) # or -legendre to remove any trend cmd = "3dToutcount %s" % str_opts try: out = commands.getoutput(cmd) except: err = "[!] QAP says: Something went wrong with running AFNI's " \ "3dToutcount." raise_smart_exception(locals(),err) # remove general information and warnings outliers = pass_floats(out) return outliers
def check_config_settings(config, parameter): """Check to make sure a configuration setting/parameter is present in the pipeline configuration dictionary. :type config: dict :param config: A dictionary keying configuration options to their chosen selections. :type parameter: str :param parameter: The key of the configuration parameter to be checked. """ if parameter not in config.keys(): err = "[!] The parameter '%s' is missing from your pipeline " \ "configuration .YML file. Please make sure this is specified " \ "properly." % parameter raise_smart_exception(locals(), err)
def create_bundles(self): """Create a list of participant "bundles". :rtype: list :return: A list of bundles - each bundle being a dictionary that is a starting resource pool for N sub-session-scan combos with N being the number of participants per bundle (set by the user) """ from qap.qap_utils import raise_smart_exception i = 0 bundles = [] for session_tuple in self._sub_dict.keys(): if i == 0: new_bundle = {} sub = session_tuple[0] ses = session_tuple[1] site_name = None if "site_name" in self._sub_dict[session_tuple].keys(): site_name = self._sub_dict[session_tuple]["site_name"] for scan in self._sub_dict[session_tuple].keys(): if type(self._sub_dict[session_tuple][scan]) is dict: # to avoid fields in sub_dict[session_tuple] that are # strings (such as site_name or creds_path) sub_info_tuple = (sub, ses, scan) new_bundle[sub_info_tuple] = \ self._sub_dict[session_tuple][scan] if site_name: new_bundle[sub_info_tuple].update( {"site_name": site_name}) i += 1 if i == self._config["num_sessions_at_once"]: bundles.append(new_bundle) i = 0 if i > 0: bundles.append(new_bundle) if len(bundles) == 0: msg = "No bundles created." raise_smart_exception(locals(), msg) return bundles
def write_nifti_image(nifti_img, file_path): """Write image data in Nibabel format into a NIFTI file. :type nifti_img: Nibabel image :param nifti_img: The image data Nibabel object to write out. :type file_path: str :param file_path: The filepath of the NIFTI image to create. """ import nibabel as nb from qap.qap_utils import raise_smart_exception try: nb.save(nifti_img, file_path) except: err = "\n\n[!] Could not save the NIFTI image using Nibabel:\n" \ "%s\n\n" % file_path raise_smart_exception(locals(), err)
def create_bundles(self): """Create a list of participant "bundles". :rtype: list :return: A list of bundles - each bundle being a dictionary that is a starting resource pool for N sub-session-scan combos with N being the number of participants per bundle (set by the user) """ from qap.qap_utils import raise_smart_exception i = 0 bundles = [] for session_tuple in self._sub_dict.keys(): if i == 0: new_bundle = {} sub = session_tuple[0] ses = session_tuple[1] site_name = None if "site_name" in self._sub_dict[session_tuple].keys(): site_name = self._sub_dict[session_tuple]["site_name"] for scan in self._sub_dict[session_tuple].keys(): if type(self._sub_dict[session_tuple][scan]) is dict: # to avoid fields in sub_dict[session_tuple] that are # strings (such as site_name or creds_path) sub_info_tuple = (sub, ses, scan) new_bundle[sub_info_tuple] = \ self._sub_dict[session_tuple][scan] if site_name: new_bundle[sub_info_tuple].update({"site_name": site_name}) i += 1 if i == self._config["num_sessions_at_once"]: bundles.append(new_bundle) i = 0 if i > 0: bundles.append(new_bundle) if len(bundles) == 0: msg = "No bundles created." raise_smart_exception(locals(),msg) return bundles
def csv_to_pandas_df(csv_file): """Convert the data in a CSV file into a Pandas DataFrame. :type csv_file: str :param csv_file: The filepath to the CSV file to be loaded. :rtype: Pandas DataFrame :return: A DataFrame object with the data from the CSV file. """ import pandas as pd from qap.qap_utils import raise_smart_exception try: data = pd.read_csv(csv_file, dtype={"Participant": str}) except Exception as e: err = "Could not load the CSV file into a DataFrame using Pandas." \ "\n\nCSV file: %s\n\nError details: %s\n\n" % (csv_file, e) raise_smart_exception(locals(), err) return data
def read_nifti_image(nifti_infile): """Read a NIFTI file into Nibabel-format image data. :type nifti_infile: str :param nifti_infile: The filepath of the NIFTI image to read in. :rtype: Nibabel image :return: Image data in Nibabel format. """ import nibabel as nb from qap.qap_utils import raise_smart_exception try: nifti_img = nb.load(nifti_infile) except: err = "\n\n[!] Could not load the NIFTI image using Nibabel:\n" \ "%s\n\n" % nifti_infile raise_smart_exception(locals(), err) return nifti_img
def load_image(image_file): """Load a raw scan image from a NIFTI file and check it. :type image_file: str :param image_file: Path to the image, usually a structural or functional scan. :rtype: Nibabel data :return: Image data in Nibabel format. """ import nibabel as nib import numpy as np from qap.qap_utils import raise_smart_exception try: img = nib.load(image_file) except: raise_smart_exception(locals()) dat = img.get_data() # Ensure that data is cast as at least 32-bit if np.issubdtype(dat.dtype, float): dat = dat.astype('float32') # Check for negative values if (dat < 0).any(): print "found negative values, setting to zero (see file: %s)" \ % image_file dat[dat<0] = 0 elif np.issubdtype(dat.dtype, int): dat = dat.astype('int32') elif np.issubdtype(dat.dtype, np.uint8): dat = dat.astype(np.uint8) else: msg = "Error: Unknown datatype %s" % dat.dtype raise_smart_exception(locals(),msg) return dat
def calculate_percent_outliers(values_list): """Calculate the percentage of outliers from a vector of values. :type values_list: list :param values_list: A list of float values. :rtype: float :return: The percentage of values from the input vector that are statistical outliers. :rtype: float :return: The inter-quartile range of the data. """ import numpy as np from qap.qap_utils import raise_smart_exception try: # calculate the IQR sorted_values = sorted(values_list) third_qr, first_qr = np.percentile(sorted_values, [75, 25]) IQR = third_qr - first_qr # calculate percent outliers third_qr_threshold = third_qr + (1.5 * IQR) first_qr_threshold = first_qr - (1.5 * IQR) high_outliers = \ [val for val in sorted_values if val > third_qr_threshold] low_outliers = \ [val for val in sorted_values if val < first_qr_threshold] total_outliers = high_outliers + low_outliers percent_outliers = \ float(len(total_outliers)) / float(len(sorted_values)) except: raise_smart_exception(locals()) return percent_outliers, IQR
def read_json(json_filename): """Read the contents of a JSON file. :type json_filename: str :param json_filename: The path to the JSON file. :rtype: dict :return: Dictionary containing the info from the JSON file. """ import os import json from qap.qap_utils import raise_smart_exception if not os.path.exists(json_filename): err = "\n\n[!] The JSON file provided does not exist.\nFilepath: " \ "%s\n\n" % json_filename raise_smart_exception(locals(),err) with open(json_filename, "r") as f: json_dict = json.load(f) return json_dict
def quality_timepoints(func_file): """Calculates a 'quality index' for each timepoint in the 4D functional dataset using AFNI's 3dTqual. - Uses AFNI 3dTqual. More info here: https://afni.nimh.nih.gov/pub/dist/doc/program_help/3dTqual.html - Used for the 'Quality' QAP functional temporal metrics. - Low values are good and indicate that the timepoint is not very different from the norm. :type func_file: str :param func_file: Filepath to the 4D functional timerseries NIFTI file. :rtype: list :return: A list of float values from AFNI 3dTqual. """ import subprocess from qap.qap_utils import raise_smart_exception opts = [] opts.append(func_file) str_opts = " ".join(opts) cmd = "3dTqual %s" % str_opts try: p = subprocess.Popen(cmd.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() except: err = "[!] QAP says: Something went wrong with running AFNI's " \ "3dTqual." raise_smart_exception(locals(), err) quality = pass_floats(out) return quality
def quality_timepoints(func_file): """Calculates a 'quality index' for each timepoint in the 4D functional dataset using AFNI's 3dTqual. - Uses AFNI 3dTqual. More info here: https://afni.nimh.nih.gov/pub/dist/doc/program_help/3dTqual.html - Used for the 'Quality' QAP functional temporal metrics. - Low values are good and indicate that the timepoint is not very different from the norm. :type func_file: str :param func_file: Filepath to the 4D functional timerseries NIFTI file. :rtype: list :return: A list of float values from AFNI 3dTqual. """ import subprocess from qap.qap_utils import raise_smart_exception opts = [] opts.append(func_file) str_opts = " ".join(opts) cmd = "3dTqual %s" % str_opts try: p = subprocess.Popen(cmd.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() except: err = "[!] QAP says: Something went wrong with running AFNI's " \ "3dTqual." raise_smart_exception(locals(),err) quality = pass_floats(out) return quality
def load_mask(mask_file, ref_file): """Load a mask from a NIFTI file and check the shape and dimensions. :type mask_file: str :param mask_file: Filepath to the binarized mask file. :type ref_file: str :param ref_file: Filepath to the anatomical file the mask is meant for. :rtype: Nibabel data :return: The mask data in Nibabel format. """ import nibabel as nib import numpy as np from qap.qap_utils import raise_smart_exception try: mask_img = nib.load(mask_file) except: raise_smart_exception(locals()) mask_dat = mask_img.get_data() ref_img = nib.load(ref_file) # Check that the specified mask is binary. mask_vals = np.unique(mask_dat) if (mask_vals.size != 2) or not (mask_vals == [0, 1]).all(): err = "Error: Mask is not binary, has %i unique val(s) of %s " \ "(see file %s)" % (mask_vals.size, mask_vals, mask_file) raise_smart_exception(locals(),err) # Verify that the mask and anatomical images have the same dimensions. if ref_img.shape != mask_img.shape: err = "Error: Mask and anatomical image are different dimensions " \ "for %s" % mask_file raise_smart_exception(locals(),err) # Verify that the mask and anatomical images are in the same space # (have the same affine matrix) if (mask_img.get_affine() == ref_img.get_affine()).all == False: err = "Error: Mask and anatomical image are not in the same space " \ "for %s vs %s" % (mask_file, ref_file) raise_smart_exception(locals(),err) return mask_dat
def run(self, config_file=None, partic_list=None): """Establish where and how we're running the pipeline and set up the run. (Entry point) - This is the entry point for pipeline building and connecting. Depending on the inputs, the appropriate workflow runner will be selected and executed. :type config_file: str :param config_file: Filepath to the pipeline configuration file in YAML format. :type partic_list: str :param partic_list: Filepath to the participant list file in YAML format. """ from time import strftime from qap_utils import raise_smart_exception, \ check_config_settings # in case we are overloading if config_file: from qap.script_utils import read_yml_file self._config = read_yml_file(config_file) self.validate_config_dict() self._config["pipeline_config_yaml"] = config_file if not self._config: raise Exception("config not found!") if partic_list: self._config["subject_list"] = partic_list # Get configurations and settings check_config_settings(self._config, "num_processors") check_config_settings(self._config, "num_sessions_at_once") check_config_settings(self._config, "available_memory") check_config_settings(self._config, "output_directory") check_config_settings(self._config, "working_directory") self._num_bundles_at_once = 1 write_report = self._config.get('write_report', False) if "cluster_system" in self._config.keys() and not self._bundle_idx: res_mngr = self._config["cluster_system"] if (res_mngr == None) or ("None" in res_mngr) or \ ("none" in res_mngr): self._platform = None else: platforms = ["SGE", "PBS", "SLURM"] self._platform = str(res_mngr).upper() if self._platform not in platforms: msg = "The resource manager %s provided in the pipeline "\ "configuration file is not one of the valid " \ "choices. It must be one of the following:\n%s" \ % (self._platform, str(platforms)) raise_smart_exception(locals(), msg) else: self._platform = None # Create output directory try: os.makedirs(self._config["output_directory"]) except: if not op.isdir(self._config["output_directory"]): err = "[!] Output directory unable to be created.\n" \ "Path: %s\n\n" % self._config["output_directory"] raise Exception(err) else: pass # Create working directory try: os.makedirs(self._config["working_directory"]) except: if not op.isdir(self._config["working_directory"]): err = "[!] Output directory unable to be created.\n" \ "Path: %s\n\n" % self._config["working_directory"] raise Exception(err) else: pass results = [] # set up callback logging import logging from nipype.pipeline.plugins.callback_log import log_nodes_cb cb_log_filename = os.path.join(self._config["output_directory"], "callback.log") # Add handler to callback log file cb_logger = logging.getLogger('callback') cb_logger.setLevel(logging.DEBUG) handler = logging.FileHandler(cb_log_filename) cb_logger.addHandler(handler) # settle run arguments (plugins) self.runargs = {} self.runargs['plugin'] = 'MultiProc' self.runargs['plugin_args'] = \ {'memory_gb': int(self._config["available_memory"]), 'status_callback': log_nodes_cb} n_procs = {'n_procs': self._config["num_processors"]} self.runargs['plugin_args'].update(n_procs) # load the participant list file into dictionary subdict = self.load_sublist() # flatten the participant dictionary self._sub_dict = self.create_session_dict(subdict) # create the list of bundles self._bundles_list = self.create_bundles() num_bundles = len(self._bundles_list) if not self._bundle_idx: # want to initialize the run-level log directory (not the bundle- # level) only the first time we run the script, due to the # timestamp. if sub-nodes are being kicked off by a batch file on # a cluster, we don't want a new timestamp for every new node run self._run_log_dir = op.join(self._config['output_directory'], '_'.join([self._run_name, "logs"]), '_'.join([strftime("%Y%m%d_%H_%M_%S"), "%dbundles" % num_bundles])) if self._run_log_dir: if not os.path.isdir(self._run_log_dir): try: os.makedirs(self._run_log_dir) except: if not op.isdir(self._run_log_dir): err = "[!] Log directory unable to be created.\n" \ "Path: %s\n\n" % self._run_log_dir raise Exception(err) else: pass if num_bundles == 1: self._config["num_sessions_at_once"] = \ len(self._bundles_list[0]) # Start the magic if not self._platform and not self._bundle_idx: # not a cluster/grid run for idx in range(1, num_bundles+1): results.append(self.run_one_bundle(idx)) elif not self._bundle_idx: # there is a self._bundle_idx only if the pipeline runner is run # with bundle_idx as a parameter - only happening either manually, # or when running on a cluster self.submit_cluster_batch_file(num_bundles) else: # if there is a bundle_idx supplied to the runner results = self.run_one_bundle(self._bundle_idx)
def create_session_dict(self, subdict): """Collapse the participant resource pools so that each participant- session combination has its own entry. - input subdict format: {'sub_01': {'session_01': {'anatomical_scan': {'scan_01': <filepath>, 'scan_02': <filepath>}, 'site_name': 'Site_1'} }, 'sub_02': {..} } - output dict format: { (sub01,session01): {"scan_01": { "anatomical_scan": <filepath>}, {"scan_02": { "anatomical_scan": <filepath>} } } :type subdict: dict :param subdict: A dictionary containing the filepaths of input files for each participant, sorted by session and scan. :rtype: dict :return: A dictionary of dictionaries where each participant-session combination has its own entry, and input file filepaths are defined. """ from qap.qap_utils import raise_smart_exception flat_sub_dict_dict = {} sites_dict = {} for subid in subdict.keys(): subid = str(subid) # sessions for session in subdict[subid].keys(): # resource files for resource in subdict[subid][session].keys(): if type(subdict[subid][session][resource]) is dict: # then this has sub-scans defined for scan in subdict[subid][session][resource].keys(): filepath = subdict[subid][session][resource][scan] resource_dict = {} resource_dict[resource] = filepath sub_info_tuple = (subid, session) if sub_info_tuple not in flat_sub_dict_dict.keys(): flat_sub_dict_dict[sub_info_tuple] = {} if scan not in flat_sub_dict_dict[sub_info_tuple].keys(): flat_sub_dict_dict[sub_info_tuple][scan] = {} flat_sub_dict_dict[sub_info_tuple][scan].update( resource_dict) elif resource == "site_name": sites_dict[subid] = subdict[subid][session][resource] else: filepath = subdict[subid][session][resource] resource_dict = {} resource_dict[resource] = filepath sub_info_tuple = (subid, session) if sub_info_tuple not in flat_sub_dict_dict.keys(): flat_sub_dict_dict[sub_info_tuple] = {} flat_sub_dict_dict[sub_info_tuple].update( resource_dict) if len(flat_sub_dict_dict) == 0: # this error message meant more for devs than user msg = "The participant dictionary is empty." raise_smart_exception(locals(), msg) # in case some subjects have site names and others don't if len(sites_dict.keys()) > 0: for subid in subdict.keys(): subid = str(subid) if subid not in sites_dict.keys(): sites_dict[subid] = None # integrate site information into flat_sub_dict_dict # it was separate in the first place to circumvent the fact # that even though site_name doesn't get keyed with scan names # names, that doesn't necessarily mean scan names haven't been # specified for that participant for sub_info_tuple in flat_sub_dict_dict.keys(): site_info = {} site_info["site_name"] = sites_dict[sub_info_tuple[0]] flat_sub_dict_dict[sub_info_tuple].update(site_info) return flat_sub_dict_dict
def qap_csv_correlations(data_old, data_new, replacements=None): """Create a dictionary of correlations between old and new versions of each QAP measure for the purpose of regression testing, for the 'qap_test_correlations.py' script. - This is for the 'qap_test_correlations.py' script. - This is intended for regression testing between versions of the QAP software. - The 'metric_list' below must be kept current with changes to metrics and their titles. :type data_old: Pandas DataFrame :param data_old: A DataFrame of QAP output measures from the older- version run. :type data_new: Pandas DataFrame :param data_new: A DataFrame of QAP output measures from the newer- version run. :type replacements: list :param replacements: A list of strings describing column name replacements, in case column names have changed; these strings are in the format "old_name,new_name". :rtype: dict :return: A dictionary of correlations values keyed by each QAP metric. """ import pandas as pd import scipy.stats from qap.qap_utils import raise_smart_exception metric_list = [ "EFC", "SNR", "FBER", "CNR", "FWHM", "Qi1", "Cortical Contrast", "Ghost_x", "Ghost_y", "Ghost_z", "GCOR", "RMSD (Mean)", "Quality (Mean)", "Fraction of Outliers (Mean)", "Std. DVARS (Mean)", "Fraction of OOB Outliers (Mean)" ] # update datasets if necessary if replacements: replace_dict = {} for word_couple in replacements: if "," not in word_couple: err = "\n\n[!] In the replacements text file, the old " \ "substring and its replacement must be separated " \ "by a comma.\n\nLine: %s\n\n" % word_couple raise Exception(err) word = word_couple.split(",")[0] new = word_couple.split(",")[1] replace_dict[word] = new data_old.rename(columns=replace_dict, inplace=True) data_new.rename(columns=replace_dict, inplace=True) # remove nulls data_old = data_old[pd.notnull(data_old["Participant"])] data_new = data_new[pd.notnull(data_new["Participant"])] for metric in metric_list: if metric in data_old: data_old = data_old[pd.notnull(data_old[metric])] if metric in data_new: data_new = data_new[pd.notnull(data_new[metric])] # make sure participant IDs are strings (if they are all digits, can be # mistakenly read in as ints or floats) if data_old["Participant"].dtype != str: try: data_old["Participant"] = data_old["Participant"].astype( int).astype(str) except ValueError: data_old["Participant"] = data_old["Participant"].astype(str) if data_new["Participant"].dtype != str: try: data_new["Participant"] = data_new["Participant"].astype( int).astype(str) except ValueError: data_new["Participant"] = data_new["Participant"].astype(str) # make sure both DFs match data_merged = pd.merge(data_old, data_new, on=["Participant", "Session", "Series"], how="inner", suffixes=("_OLD", "_NEW")) if len(data_merged) == 0: # try a last-ditch approach try: data_old["Participant"] = data_old["Participant"].astype(int) data_new["Participant"] = data_new["Participant"].astype(int) data_merged = pd.merge(data_old, data_new, on=["Participant", "Session", "Series"], how="inner", suffixes=("_OLD", "_NEW")) except: pass if len(data_merged) == 0: err = "[!] There were no participant matches between the two " \ "CSVs." raise_smart_exception(locals(), err) # correlate the numbers! correlations_dict = {} for metric in metric_list: metric_old = "_".join([metric, "OLD"]) metric_new = "_".join([metric, "NEW"]) if (metric_old in data_merged) and (metric_new in data_merged): metric_old_val = data_merged[metric_old] metric_new_val = data_merged[metric_new] correlations_dict[metric] = scipy.stats.pearsonr( metric_old_val, metric_new_val) return correlations_dict
def json_to_csv(json_dict, csv_output_dir=None): """Extract the data from the JSON output file and write it to a CSV file. :type json_dict: dict :param json_dict: Dictionary containing all of the JSON output information from the QAP run. :type csv_output_dir: str :param csv_output_dir: (default: None) Path to the directory to write the CSV file into. :rtype: str :return: The CSV file path. """ import os import pandas as pd from qap.qap_utils import raise_smart_exception qap_types = [ "anatomical_spatial", "functional_spatial", "functional_temporal" ] output_dict = {} for sub_sess_scan in json_dict.keys(): # flatten the JSON dict sub_json_dict = json_dict[sub_sess_scan] header_dict = {} qap_dict = {} try: header_dict = sub_json_dict["anatomical_header_info"] except KeyError: pass try: header_dict = sub_json_dict["functional_header_info"] except KeyError: pass for qap_type in qap_types: try: qap_dict = sub_json_dict[qap_type] except KeyError: continue for key in sub_json_dict.keys(): if "anatomical" not in key and "functional" not in key: qap_dict[key] = sub_json_dict[key] qap_dict.update(header_dict) try: output_dict[qap_type].append(qap_dict) except KeyError: output_dict[qap_type] = [qap_dict] for qap_type in output_dict.keys(): json_df = pd.DataFrame(output_dict[qap_type]) json_df.sort_values(by=["Participant", "Session", "Series"], inplace=True) if not csv_output_dir: csv_output_dir = os.getcwd() csv_file = os.path.join(csv_output_dir, "qap_%s.csv" % qap_type) try: json_df.to_csv(csv_file) except: err = "Could not write CSV file!\nCSV file: %s" % csv_file raise_smart_exception(locals(), err) print "CSV file created successfully: %s" % csv_file return csv_file
def create_header_dict_entry(in_file, subject, session, scan, type): """Gather the header information from a NIFTI file and arrange it into a Python dictionary. :type in_file: str :param in_file: Filepath to the NIFTI raw data scan. :type subject: str :param subject: The participant ID. :type session: str :param session: The session ID. :type scan: str :param scan: The scan ID. :type type: str :param type: The data type ("anatomical" or "functional"). :rtype: dict :return: A dictionary with the header information of the file, keyed by the participant's ID data. """ import os import nibabel as nb from qap.qap_utils import raise_smart_exception if not os.path.isfile(in_file): err = "Filepath doesn't exist!\nFilepath: %s" % in_file raise_smart_exception(locals(),err) try: img = nb.load(in_file) img_header = img.header except: err = "You may not have an up-to-date installation of the Python " \ "Nibabel package.\nYour Nibabel version: %s" % \ str(nb.__version__) raise_smart_exception(locals(),err) subkey = "%s_header_info" % type id_string = "%s %s %s" % (subject, session, scan) qap_dict = {id_string: {subkey: {}}} info_labels = ["descrip", "db_name", "bitpix", "slice_start", "scl_slope", "scl_inter", "slice_end", "slice_duration", "toffset", "quatern_b", "quatern_c", "quatern_d", "qoffset_x", "qoffset_y", "qoffset_z", "srow_x", "srow_y", "srow_z", "aux_file", "intent_name", "slice_code", "data_type", "qform_code", "sform_code"] for info_label in info_labels: try: qap_dict[id_string][subkey][info_label] = \ str(img_header[info_label]) except: print "\n\n%s field not in NIFTI header of %s\n\n" % \ (info_label, in_file) qap_dict[id_string][subkey][info_label] = "" pass try: pixdim = img_header['pixdim'] qap_dict[id_string][subkey]["pix_dimx"] = str(pixdim[1]) qap_dict[id_string][subkey]["pix_dimy"] = str(pixdim[2]) qap_dict[id_string][subkey]["pix_dimz"] = str(pixdim[3]) qap_dict[id_string][subkey]["tr"] = str(pixdim[4]) except: print "\n\npix_dim/TR fields not in NIFTI header of %s\n\n" % in_file pass try: qap_dict[id_string][subkey]["extensions"] = \ len(img.header.extensions.get_codes()) except: print "\n\nExtensions not in NIFTI header of %s\n\n" % in_file pass return qap_dict
def create_header_dict_entry(in_file, subject, session, scan, type): """Gather the header information from a NIFTI file and arrange it into a Python dictionary. :type in_file: str :param in_file: Filepath to the NIFTI raw data scan. :type subject: str :param subject: The participant ID. :type session: str :param session: The session ID. :type scan: str :param scan: The scan ID. :type type: str :param type: The data type ("anatomical" or "functional"). :rtype: dict :return: A dictionary with the header information of the file, keyed by the participant's ID data. """ import os import nibabel as nb from qap.qap_utils import raise_smart_exception if not os.path.isfile(in_file): err = "Filepath doesn't exist!\nFilepath: %s" % in_file raise_smart_exception(locals(), err) try: img = nb.load(in_file) img_header = img.header except: err = "You may not have an up-to-date installation of the Python " \ "Nibabel package.\nYour Nibabel version: %s" % \ str(nb.__version__) raise_smart_exception(locals(), err) subkey = "%s_header_info" % type id_string = "%s %s %s" % (subject, session, scan) qap_dict = {id_string: {subkey: {}}} info_labels = [ "descrip", "db_name", "bitpix", "slice_start", "scl_slope", "scl_inter", "slice_end", "slice_duration", "toffset", "quatern_b", "quatern_c", "quatern_d", "qoffset_x", "qoffset_y", "qoffset_z", "srow_x", "srow_y", "srow_z", "aux_file", "intent_name", "slice_code", "data_type", "qform_code", "sform_code" ] for info_label in info_labels: try: qap_dict[id_string][subkey][info_label] = \ str(img_header[info_label]) except: print "\n\n%s field not in NIFTI header of %s\n\n" % \ (info_label, in_file) qap_dict[id_string][subkey][info_label] = "" pass try: pixdim = img_header['pixdim'] qap_dict[id_string][subkey]["pix_dimx"] = str(pixdim[1]) qap_dict[id_string][subkey]["pix_dimy"] = str(pixdim[2]) qap_dict[id_string][subkey]["pix_dimz"] = str(pixdim[3]) qap_dict[id_string][subkey]["tr"] = str(pixdim[4]) except: print "\n\npix_dim/TR fields not in NIFTI header of %s\n\n" % in_file pass try: qap_dict[id_string][subkey]["extensions"] = \ len(img.header.extensions.get_codes()) except: print "\n\nExtensions not in NIFTI header of %s\n\n" % in_file pass return qap_dict
def run(self, config_file=None, partic_list=None): """Establish where and how we're running the pipeline and set up the run. (Entry point) - This is the entry point for pipeline building and connecting. Depending on the inputs, the appropriate workflow runner will be selected and executed. :type config_file: str :param config_file: Filepath to the pipeline configuration file in YAML format. :type partic_list: str :param partic_list: Filepath to the participant list file in YAML format. """ from time import strftime from qap_utils import raise_smart_exception, \ check_config_settings # in case we are overloading if config_file: from qap.script_utils import read_yml_file self._config = read_yml_file(config_file) self.validate_config_dict() self._config["pipeline_config_yaml"] = config_file if not self._config: raise Exception("config not found!") if partic_list: self._config["subject_list"] = partic_list # Get configurations and settings check_config_settings(self._config, "num_processors") check_config_settings(self._config, "num_sessions_at_once") check_config_settings(self._config, "available_memory") check_config_settings(self._config, "output_directory") check_config_settings(self._config, "working_directory") self._num_bundles_at_once = 1 write_report = self._config.get('write_report', False) if "cluster_system" in self._config.keys() and not self._bundle_idx: res_mngr = self._config["cluster_system"] if (res_mngr == None) or ("None" in res_mngr) or \ ("none" in res_mngr): self._platform = None else: platforms = ["SGE", "PBS", "SLURM"] self._platform = str(res_mngr).upper() if self._platform not in platforms: msg = "The resource manager %s provided in the pipeline "\ "configuration file is not one of the valid " \ "choices. It must be one of the following:\n%s" \ % (self._platform, str(platforms)) raise_smart_exception(locals(), msg) else: self._platform = None # Create output directory try: os.makedirs(self._config["output_directory"]) except: if not op.isdir(self._config["output_directory"]): err = "[!] Output directory unable to be created.\n" \ "Path: %s\n\n" % self._config["output_directory"] raise Exception(err) else: pass # Create working directory try: os.makedirs(self._config["working_directory"]) except: if not op.isdir(self._config["working_directory"]): err = "[!] Output directory unable to be created.\n" \ "Path: %s\n\n" % self._config["working_directory"] raise Exception(err) else: pass results = [] # set up callback logging import logging from nipype.pipeline.plugins.callback_log import log_nodes_cb cb_log_filename = os.path.join(self._config["output_directory"], "callback.log") # Add handler to callback log file cb_logger = logging.getLogger('callback') cb_logger.setLevel(logging.DEBUG) handler = logging.FileHandler(cb_log_filename) cb_logger.addHandler(handler) # settle run arguments (plugins) self.runargs = {} self.runargs['plugin'] = 'MultiProc' self.runargs['plugin_args'] = \ {'memory_gb': int(self._config["available_memory"]), 'status_callback': log_nodes_cb} n_procs = {'n_procs': self._config["num_processors"]} self.runargs['plugin_args'].update(n_procs) # load the participant list file into dictionary subdict = self.load_sublist() # flatten the participant dictionary self._sub_dict = self.create_session_dict(subdict) # create the list of bundles self._bundles_list = self.create_bundles() num_bundles = len(self._bundles_list) if not self._bundle_idx: # want to initialize the run-level log directory (not the bundle- # level) only the first time we run the script, due to the # timestamp. if sub-nodes are being kicked off by a batch file on # a cluster, we don't want a new timestamp for every new node run self._run_log_dir = op.join( self._config['output_directory'], '_'.join([self._run_name, "logs"]), '_'.join( [strftime("%Y%m%d_%H_%M_%S"), "%dbundles" % num_bundles])) if self._run_log_dir: if not os.path.isdir(self._run_log_dir): try: os.makedirs(self._run_log_dir) except: if not op.isdir(self._run_log_dir): err = "[!] Log directory unable to be created.\n" \ "Path: %s\n\n" % self._run_log_dir raise Exception(err) else: pass if num_bundles == 1: self._config["num_sessions_at_once"] = \ len(self._bundles_list[0]) # Start the magic if not self._platform and not self._bundle_idx: # not a cluster/grid run for idx in range(1, num_bundles + 1): results.append(self.run_one_bundle(idx)) elif not self._bundle_idx: # there is a self._bundle_idx only if the pipeline runner is run # with bundle_idx as a parameter - only happening either manually, # or when running on a cluster self.submit_cluster_batch_file(num_bundles) else: # if there is a bundle_idx supplied to the runner results = self.run_one_bundle(self._bundle_idx)
def fd_jenkinson(in_file, rmax=80., out_file=None, out_array=False): """Calculate Jenkinson's Mean Framewise Displacement (aka RMSD) and save the Mean FD values to a file. - Method to calculate Framewise Displacement (FD) calculations (Jenkinson et al., 2002). - Implementation written by @ Krsna, May 2013. - Jenkinson FD from 3dvolreg's *.affmat12.1D file from -1Dmatrix_save option input: subject ID, rest_number, name of 6 parameter motion correction file (an output of 3dvolreg) output: FD_J.1D file - in_file should have one 3dvolreg affine matrix in one row - NOT the motion parameters. :type in_file: str :param in_file: Filepath to the coordinate transformation output vector of AFNI's 3dvolreg (generated by running 3dvolreg with the -1Dmatrix_save option). :type rmax: float :param rmax: (default: 80.0) The default radius of a sphere that represents the brain. :type out_file: str :param out_file: (default: None) The filepath to where the output file should be written. :type out_array: bool :param out_array: (default: False) Flag to return the data in a Python NumPy array instead of an output file. :rtype: str :return: (if out_array=False) The filepath to the output file containing the Mean FD values. :rtype: NumPy array :return: (if out_array=True) An array of the output Mean FD values. """ import numpy as np import os.path as op from shutil import copyfile import math from qap.qap_utils import raise_smart_exception if out_file is None: fname, ext = op.splitext(op.basename(in_file)) out_file = op.abspath('%s_fdfile%s' % (fname, ext)) # if in_file (coordinate_transformation) is actually the rel_mean output # of the MCFLIRT command, forward that file if 'rel.rms' in in_file: copyfile(in_file, out_file) return out_file try: pm_ = np.genfromtxt(in_file) except: raise_smart_exception(locals()) original_shape = pm_.shape pm = np.zeros((pm_.shape[0], pm_.shape[1] + 4)) pm[:, :original_shape[1]] = pm_ pm[:, original_shape[1]:] = [0.0, 0.0, 0.0, 1.0] # rigid body transformation matrix T_rb_prev = np.matrix(np.eye(4)) flag = 0 X = [0] # First timepoint for i in range(0, pm.shape[0]): # making use of the fact that the order of aff12 matrix is "row-by-row" T_rb = np.matrix(pm[i].reshape(4, 4)) if flag == 0: flag = 1 else: M = np.dot(T_rb, T_rb_prev.I) - np.eye(4) A = M[0:3, 0:3] b = M[0:3, 3] FD_J = math.sqrt( (rmax * rmax / 5) * np.trace(np.dot(A.T, A)) + np.dot(b.T, b)) X.append(FD_J) T_rb_prev = T_rb try: np.savetxt(out_file, np.array(X)) except: raise_smart_exception(locals()) if out_array: return np.array(X) else: return out_file
def create_session_dict(self, subdict): """Collapse the participant resource pools so that each participant- session combination has its own entry. - input subdict format: {'sub_01': {'session_01': {'anatomical_scan': {'scan_01': <filepath>, 'scan_02': <filepath>}, 'site_name': 'Site_1'} }, 'sub_02': {..} } - output dict format: { (sub01,session01): {"scan_01": { "anatomical_scan": <filepath>}, {"scan_02": { "anatomical_scan": <filepath>} } } :type subdict: dict :param subdict: A dictionary containing the filepaths of input files for each participant, sorted by session and scan. :rtype: dict :return: A dictionary of dictionaries where each participant-session combination has its own entry, and input file filepaths are defined. """ from qap.qap_utils import raise_smart_exception flat_sub_dict_dict = {} sites_dict = {} for subid in subdict.keys(): subid = str(subid) # sessions for session in subdict[subid].keys(): # resource files for resource in subdict[subid][session].keys(): if type(subdict[subid][session][resource]) is dict: # then this has sub-scans defined for scan in subdict[subid][session][resource].keys(): filepath = subdict[subid][session][resource][scan] resource_dict = {} resource_dict[resource] = filepath sub_info_tuple = (subid, session) if sub_info_tuple not in flat_sub_dict_dict.keys(): flat_sub_dict_dict[sub_info_tuple] = {} if scan not in flat_sub_dict_dict[ sub_info_tuple].keys(): flat_sub_dict_dict[sub_info_tuple][scan] = {} flat_sub_dict_dict[sub_info_tuple][scan].update( resource_dict) elif resource == "site_name": sites_dict[subid] = subdict[subid][session][resource] else: filepath = subdict[subid][session][resource] resource_dict = {} resource_dict[resource] = filepath sub_info_tuple = (subid, session) if sub_info_tuple not in flat_sub_dict_dict.keys(): flat_sub_dict_dict[sub_info_tuple] = {} flat_sub_dict_dict[sub_info_tuple].update( resource_dict) if len(flat_sub_dict_dict) == 0: # this error message meant more for devs than user msg = "The participant dictionary is empty." raise_smart_exception(locals(), msg) # in case some subjects have site names and others don't if len(sites_dict.keys()) > 0: for subid in subdict.keys(): subid = str(subid) if subid not in sites_dict.keys(): sites_dict[subid] = None # integrate site information into flat_sub_dict_dict # it was separate in the first place to circumvent the fact # that even though site_name doesn't get keyed with scan names # names, that doesn't necessarily mean scan names haven't been # specified for that participant for sub_info_tuple in flat_sub_dict_dict.keys(): site_info = {} site_info["site_name"] = sites_dict[sub_info_tuple[0]] flat_sub_dict_dict[sub_info_tuple].update(site_info) return flat_sub_dict_dict
def fd_jenkinson(in_file, rmax=80., out_file=None, out_array=False): """Calculate Jenkinson's Mean Framewise Displacement (aka RMSD) and save the Mean FD values to a file. - Method to calculate Framewise Displacement (FD) calculations (Jenkinson et al., 2002). - Implementation written by @ Krsna, May 2013. - Jenkinson FD from 3dvolreg's *.affmat12.1D file from -1Dmatrix_save option input: subject ID, rest_number, name of 6 parameter motion correction file (an output of 3dvolreg) output: FD_J.1D file - in_file should have one 3dvolreg affine matrix in one row - NOT the motion parameters. :type in_file: str :param in_file: Filepath to the coordinate transformation output vector of AFNI's 3dvolreg (generated by running 3dvolreg with the -1Dmatrix_save option). :type rmax: float :param rmax: (default: 80.0) The default radius of a sphere that represents the brain. :type out_file: str :param out_file: (default: None) The filepath to where the output file should be written. :type out_array: bool :param out_array: (default: False) Flag to return the data in a Python NumPy array instead of an output file. :rtype: str :return: (if out_array=False) The filepath to the output file containing the Mean FD values. :rtype: NumPy array :return: (if out_array=True) An array of the output Mean FD values. """ import numpy as np import os.path as op from shutil import copyfile import math from qap.qap_utils import raise_smart_exception if out_file is None: fname, ext = op.splitext(op.basename(in_file)) out_file = op.abspath('%s_fdfile%s' % (fname, ext)) # if in_file (coordinate_transformation) is actually the rel_mean output # of the MCFLIRT command, forward that file if 'rel.rms' in in_file: copyfile(in_file, out_file) return out_file try: pm_ = np.genfromtxt(in_file) except: raise_smart_exception(locals()) original_shape = pm_.shape pm = np.zeros((pm_.shape[0], pm_.shape[1] + 4)) pm[:, :original_shape[1]] = pm_ pm[:, original_shape[1]:] = [0.0, 0.0, 0.0, 1.0] # rigid body transformation matrix T_rb_prev = np.matrix(np.eye(4)) flag = 0 X = [0] # First timepoint for i in range(0, pm.shape[0]): # making use of the fact that the order of aff12 matrix is "row-by-row" T_rb = np.matrix(pm[i].reshape(4, 4)) if flag == 0: flag = 1 else: M = np.dot(T_rb, T_rb_prev.I) - np.eye(4) A = M[0:3, 0:3] b = M[0:3, 3] FD_J = math.sqrt((rmax * rmax / 5) * np.trace(np.dot(A.T, A)) + np.dot(b.T, b)) X.append(FD_J) T_rb_prev = T_rb try: np.savetxt(out_file, np.array(X)) except: raise_smart_exception(locals()) if out_array: return np.array(X) else: return out_file