Example #1
0
def deface(bidsdir: str, pattern: str, subjects: list, output: str,
           cluster: bool, nativespec: str, kwargs: dict):

    # Input checking
    bidsdir = Path(bidsdir)

    # Start logging
    bids.setup_logging(bidsdir / 'code' / 'bidscoin' / 'deface.log')
    LOGGER.info('')
    LOGGER.info('------------ START deface ------------')
    LOGGER.info(
        f">>> deface bidsfolder={bidsdir} pattern={pattern} subjects={subjects} output={output}"
        f" cluster={cluster} nativespec={nativespec} {kwargs}")

    # Get the list of subjects
    if not subjects:
        subjects = bids.lsdirs(bidsdir, 'sub-*')
        if not subjects:
            LOGGER.warning(f"No subjects found in: {bidsdir/'sub-*'}")
    else:
        subjects = [
            'sub-' + subject.replace('^sub-', '') for subject in subjects
        ]  # Make sure there is a "sub-" prefix
        subjects = [
            bidsdir / subject for subject in subjects
            if (bidsdir / subject).is_dir()
        ]

    # Prepare the HPC job submission
    with drmaa.Session() as pbatch:
        if cluster:
            jt = pbatch.createJobTemplate()
            jt.jobEnvironment = os.environ
            jt.remoteCommand = shutil.which('pydeface')
            jt.nativeSpecification = nativespec
            jt.joinFiles = True

        # Loop over bids subject/session-directories
        for n, subject in enumerate(subjects, 1):

            sessions = bids.lsdirs(subject, 'ses-*')
            if not sessions:
                sessions = [subject]
            for session in sessions:

                LOGGER.info('--------------------------------------')
                LOGGER.info(f"Processing ({n}/{len(subjects)}): {session}")

                sub_id, ses_id = bids.get_subid_sesid(session / 'dum.my')

                # Search for images that need to be defaced
                for match in sorted([
                        match for match in session.glob(pattern)
                        if '.nii' in match.suffixes
                ]):

                    # Construct the output filename and relative path name (used in BIDS)
                    match_rel = str(match.relative_to(session))
                    if not output:
                        outputfile = match
                        outputfile_rel = match_rel
                    elif output == 'derivatives':
                        outputfile = bidsdir / 'derivatives' / 'deface' / sub_id / ses_id / match.parent.name / match.name
                        outputfile_rel = str(outputfile.relative_to(bidsdir))
                    else:
                        outputfile = session / output / match.name
                        outputfile_rel = str(outputfile.relative_to(session))
                    outputfile.parent.mkdir(parents=True, exist_ok=True)

                    # Deface the image
                    LOGGER.info(f"Defacing: {match_rel} -> {outputfile_rel}")
                    if cluster:
                        jt.args = [
                            str(match), '--outfile',
                            str(outputfile), '--force'
                        ] + [
                            item for pair in [[f"--{key}", val]
                                              for key, val in kwargs.items()]
                            for item in pair
                        ]
                        jt.jobName = f"pydeface_{sub_id}_{ses_id}"
                        jobid = pbatch.runJob(jt)
                        LOGGER.info(
                            f"Your deface job has been submitted with ID: {jobid}"
                        )
                    else:
                        pdu.deface_image(str(match),
                                         str(outputfile),
                                         force=True,
                                         forcecleanup=True,
                                         **kwargs)

                    # Add a json sidecar-file
                    outputjson = outputfile.with_suffix('').with_suffix(
                        '.json')
                    LOGGER.info(f"Adding a json sidecar-file: {outputjson}")
                    shutil.copyfile(
                        match.with_suffix('').with_suffix('.json'), outputjson)

                    # Update the IntendedFor fields in the fieldmap sidecar files
                    if output and output != 'derivatives' and (
                            session / 'fmap').is_dir():
                        for fmap in (session / 'fmap').glob('*.json'):
                            with fmap.open('r') as fmap_fid:
                                fmap_data = json.load(fmap_fid)
                            intendedfor = fmap_data['IntendedFor']
                            if type(intendedfor) == str:
                                intendedfor = [intendedfor]
                            if match_rel in intendedfor:
                                LOGGER.info(
                                    f"Updating 'IntendedFor' to {outputfile_rel} in {fmap}"
                                )
                                fmap_data['IntendedFor'] = intendedfor + [
                                    outputfile_rel
                                ]
                                with fmap.open('w') as fmap_fid:
                                    json.dump(fmap_data, fmap_fid, indent=4)

                    # Update the scans.tsv file
                    scans_tsv = session / f"{sub_id}{bids.add_prefix('_',ses_id)}_scans.tsv"
                    if output and output != 'derivatives' and scans_tsv.is_file(
                    ):
                        LOGGER.info(f"Adding {outputfile_rel} to {scans_tsv}")
                        scans_table = pd.read_csv(scans_tsv,
                                                  sep='\t',
                                                  index_col='filename')
                        scans_table.loc[outputfile_rel] = scans_table.loc[
                            match_rel]
                        scans_table.sort_values(by=['acq_time', 'filename'],
                                                inplace=True)
                        scans_table.to_csv(scans_tsv,
                                           sep='\t',
                                           encoding='utf-8')

        if cluster:
            LOGGER.info('Waiting for the deface jobs to finish...')
            pbatch.synchronize(jobIds=[pbatch.JOB_IDS_SESSION_ALL],
                               timeout=pbatch.TIMEOUT_WAIT_FOREVER,
                               dispose=True)
            pbatch.deleteJobTemplate(jt)

    LOGGER.info('-------------- FINISHED! -------------')
    LOGGER.info('')
def bids_acquisition_download(data_root_path='',
                              dataset_name=None,
                              force_download=False,
                              behav_path='exp_info/recorded_events',
                              copy_events='n',
                              deface=False,
                              dry_run=False):
    """Automatically download files from neurospin server to a BIDS dataset.

    Download-database is based on NeuroSpin server conventions.
    Options are 'prisma', 'trio' and custom path.
    Prisma db_path = '/neurospin/acquisition/database/Prisma_fit'
    Trio db_path = '/neurospin/acquisition/database/TrioTim'

    The bids dataset is created if necessary before download with some
    empy mandatory files to be filled like README in case they dont exist.

    The download depends on the file '[sub-*_][ses-*_]download.csv' contained
    in the folder 'exp_info'.

    NIP and acq date of the subjects will be taken automatically from
    exp_info/participants.tsv file that follows bids standard. The file will
    be copied in the dataset folder without the NIP column for privacy.

    Posible exceptions
    1) exp_info directory not found
    2) participants.tsv not found
    3) download files not found
    4) Acquisition directory in neurospin server not found
    5) There is more than one acquisition directory (Have to ask manip for
    extra digits for NIP, the NIP then would look like xxxxxxxx-ssss)
    6) Event file corresponding to downloaded bold.nii not found
    """

    ### CHECK PATHS AND FILES

    # exp_info path where is the participants.tsv
    # print(data_root_path)
    exp_info_path = os.path.join(data_root_path, 'exp_info')
    if not os.path.exists(exp_info_path):
        raise Exception('exp_info directory not found')
    if not os.path.isfile(os.path.join(exp_info_path, 'participants.tsv')):
        raise Exception('exp_info/participants.tsv not found')

    # Determine target path with the name of dataset
    dataset_name, target_root_path = get_bids_default_path(
        data_root_path, dataset_name)

    # Create dataset directories and files if necessary
    bids_init_dataset(data_root_path, dataset_name)

    # Manage the report and download information
    download_report = ('download_report_' +
                       time.strftime("%d-%b-%Y-%H:%M:%S", time.gmtime()) +
                       '.csv')
    report_path = os.path.join(data_root_path, 'report')
    if not os.path.exists(report_path):
        os.makedirs(report_path)
    download_report = open(os.path.join(report_path, download_report), 'w')
    #report_line = '%s,%s,%s\n' % ('subject_id', 'session_id', 'download_file')
    #download_report.write(report_line)
    list_imported = []
    list_already_imported = []
    list_warning = []

    # Create a dataFrame to store participant information
    #df_participant = pd.DataFrame()
    #Dict for info participant
    #list_all_participants = {}
    dic_info_participants = OrderedDict()

    # List for the bacth file for dc2nii_batch command
    infiles_dcm2nii = []

    # List for data to deface
    files_for_pydeface = []

    #Dict of descriptors to be added
    dict_descriptors = {}

    ### GETTING FOR INFORMATION TO DOWNLOAD

    # Download command for each subject/session
    # one line has the following information
    # participant_id / NIP / infos_participant / session_label / acq_date / location / to_import

    # Read the participants.tsv file for getting subjects/sessions to download
    pop = pd.read_csv(os.path.join(exp_info_path, 'participants.tsv'),
                      dtype=str,
                      sep='\t',
                      index_col=False)

    #print(df_participant)

    for row_idx, subject_info in pop.iterrows():
        # Fill the partcipant information for the participants.tsv
        if subject_info[0] in dic_info_participants:
            existing_items = dic_info_participants[subject_info[0]]
            dico_add = {}
            info_participant = json.loads(subject_info['infos_participant'])
            for k, v in info_participant.items():
                if not k in existing_items:
                    dico_add[k] = v
            #fusion dicos
            existing_items.update(dico_add)
            dic_info_participants[subject_info[0]] = existing_items
        else:
            dic_info_participants[subject_info[0]] = json.loads(
                subject_info['infos_participant'])

        # Determine path to files in NeuroSpin server
        download_database = subject_info['location']
        if download_database in NEUROSPIN_DATABASES:
            db_path = NEUROSPIN_DATABASES[download_database]
        else:
            db_path = download_database

        # the row_idx for giving either participant_label or participant_id
        subject_id = subject_info[0]

        # sub_path = target_root_path + subject_id + ses_path
        # Mange the optional filters
        # optional_filters = [('sub', subject_id)]
        # if session_id is not None:
        #  optional_filters += [('ses', session_id)]
        if 'session_label' in subject_info.index:
            if subject_info['session_label'] is not pd.np.nan:
                session_id = subject_info['session_label']
            else:
                session_id = None
        if session_id is None:
            ses_path = ''
        else:
            ses_path = 'ses-' + session_id
        try:
            int(subject_id)
            subject_id = 'sub-{0}'.format(subject_id)
        except:
            if ('sub-') in subject_id:
                subject_id = subject_id
            else:
                subject_id = subject_id
                print('****  BIDS IMPORTATION WARMING: SUBJECT ID PROBABLY '
                      'NOT CONFORM')
        sub_path = os.path.join(target_root_path, subject_id, ses_path)
        if not os.path.exists(sub_path):
            os.makedirs(sub_path)

        # Avoid redownloading subjects/sessions
        if not force_download:
            check_file = os.path.join(sub_path, 'downloaded')
            if os.path.isfile(check_file):
                continue

        # DATE has to be transformed from BIDS to NeuroSpin server standard
        # NeuroSpin standard is yyyymmdd -> Bids standard is YYYY-MM-DD
        acq_date = subject_info['acq_date'].replace('-', '').replace('\n', '')

        #acq_label
        acq_label = subject_info['acq_label']

        #dir_label
        #dir_label = subject_info['dir_label']

        # nip number
        nip = subject_info['NIP']

        # Get appropriate download file. As specific as possible
        # specs_path = file_manager_default_file(exp_info_path,
        #                                optional_filters, 'download',
        #                               file_type='tsv',
        #                               allow_other_fields=False)
        #report_line = '%s,%s,%s\n' % (subject_id, session_id, specs_path)
        #download_report.write(report_line)

        #specs = pd.read_csv(specs_path, dtype=str, sep='\t', index_col=False)

        # Retrieve list of list for seqs to import
        # One tuple is configured as :(file_to_import;acq_folder;acq_name)
        # value[0] : num of seq
        # value[1] : modality
        # value[2] : part of ht file_name
        print("Scans for ", subject_info['NIP'])
        print(subject_info['to_import'])
        seqs_to_retrieve = literal_eval(subject_info['to_import'])
        #Convert the first element id there is only one sequence, otherwise
        #each value will be used as str and note tuple).
        if isinstance(seqs_to_retrieve[0], str):
            seqs_to_retrieve = [seqs_to_retrieve]

        # download data, store information in batch files for anat/fmri
        # download data for meg data
        for value in seqs_to_retrieve:
            #print(seqs_to_retrieve)
            def get_value(key, text):
                m = re.search(key + '-(.+?)_', text)
                if m:
                    return m.group(1)
                else:
                    return None

            run_task = get_value('task', value[2])
            run_id = get_value('run', value[2])
            run_dir = get_value('dir', value[2])
            run_session = session_id

            tag = value[2].split('_')[-1]

            target_path = os.path.join(sub_path, value[1])
            if not os.path.exists(target_path):
                os.makedirs(target_path)

            # MEG CASE
            if value[1] == 'meg':
                # Create subject path if necessary
                meg_path = os.path.join(sub_path, 'meg')
                if not os.path.exists(meg_path):
                    os.makedirs(meg_path)

                # Create the sub-emptyroom
                #sub-emptyroom_path = os.path.join(data_root_path, 'sub_emptyroom')
                #if not os.path.exists(sub-emptyroom_path):
                #    os.makedirs(sub-emptyroom_path)

                meg_file = os.path.join(db_path, nip, acq_date, value[0])
                print(meg_file)
                filename = get_bids_file_descriptor(subject_id,
                                                    task_id=run_task,
                                                    run_id=run_id,
                                                    run_dir=run_dir,
                                                    session_id=run_session,
                                                    file_tag=tag,
                                                    acq_label=acq_label,
                                                    file_type='tif')
                #output_path = os.path.join(target_path, filename)
                #print(output_path)
                #shutil.copyfile(meg_file, output_path)
                raw = mne.io.read_raw_fif(meg_file, allow_maxshield=True)

                write_raw_bids(raw, filename, target_path, overwrite=True)
                # add event
                # create json file
                #copy the subject emptyroom

            # ANAT and FUNC case
            elif (value[1] == 'anat') or (value[1] == 'func') or (value[1]
                                                                  == 'fmap'):
                download = True
                dicom_paths = []
                path_file_glob = ""
                nip_dirs = glob.glob(
                    os.path.join(db_path, str(acq_date),
                                 str(nip) + '*'))
                #print(os.path.join(db_path, str(acq_date), str(nip) + '*'))
                if len(nip_dirs) < 1:
                    list_warning.append(
                        f"\n WARNING: No directory found for given NIP {nip} and SESSION {session_id}"
                    )
                    #print(message)
                    #download_report.write(message)
                    download = False
                elif len(nip_dirs) > 1:
                    list_warning.append(
                        f"\n  WARNING: Multiple path for given NIP {nip} \
                            SESSION {session_id} - please \
                            mention the session of the subject for this date, \
                            2 sessions for the same subject the same day are \
                            possible")
                    #print(message)
                    #download_report.write(message)
                    download = False
                else:
                    path_file_glob = os.path.join(
                        nip_dirs[0], '{0:06d}_*'.format(int(value[0])))
                    #print(path_file_glob)
                    dicom_paths = glob.glob(path_file_glob)

                if not dicom_paths and download:
                    list_warning.append("\n WARNING: file not found " +
                                        path_file_glob)
                    #print(message)
                    #download_report.write(message)
                elif download:
                    dicom_path = dicom_paths[0]
                    list_imported.append("\n IMPORTATION OF " + dicom_path)
                    #print(message)
                    #download_report.write(message)
                    # Expecting page 10 bids specification file name
                    filename = get_bids_file_descriptor(subject_id,
                                                        task_id=run_task,
                                                        run_id=run_id,
                                                        run_dir=run_dir,
                                                        session_id=run_session,
                                                        file_tag=tag,
                                                        acq_label=acq_label,
                                                        file_type='nii')

                    if value[1] == 'anat' and deface:
                        print("\n Deface with pydeface")
                        files_for_pydeface.append(
                            os.path.join(target_path, filename))

                    # append list for preparing the batch importation
                    file_to_convert = {
                        'in_dir': dicom_path,
                        'out_dir': target_path,
                        'filename': os.path.splitext(filename)[0]
                    }
                    is_file_to_import = os.path.join(
                        os.path.join(os.getcwd(), target_path, filename))

                    if (os.path.isfile(is_file_to_import)):
                        list_already_imported.append(
                            f" ALREADY IMPORTED: {is_file_to_import}")
                    else:
                        infiles_dcm2nii.append(file_to_convert)

                    # Add descriptor into the json file
                    if run_task:
                        filename_json = os.path.join(target_path,
                                                     filename[:-3] + 'json')
                        dict_descriptors.update(
                            {filename_json: {
                                'TaskName': run_task
                            }})

                    if len(value) == 4:
                        #print('value[3]', value[3] )
                        filename_json = os.path.join(target_path,
                                                     filename[:-3] + 'json')
                        dict_descriptors.update({filename_json: value[3]})

        #Importation and conversion of dicom files
        dcm2nii_batch = dict(Options=dict(isGz='false',
                                          isFlipY='false',
                                          isVerbose='false',
                                          isCreateBIDS='true',
                                          isOnlySingleFile='false'),
                             Files=infiles_dcm2nii)

    dcm2nii_batch_file = os.path.join(exp_info_path, 'batch_dcm2nii.yaml')
    with open(dcm2nii_batch_file, 'w') as f:
        data = yaml.dump(dcm2nii_batch, f)

    print(
        "\n------------------------------------------------------------------------------------"
    )
    print(
        "-------------------    SUMMARY OF IMPORTATION   --------------------------------------"
    )
    print(
        "--------------------------------------------------------------------------------------\n"
    )
    for i in list_already_imported:
        print(i)
        download_report.write(i)
    print(
        "\n------------------------------------------------------------------------------------"
    )
    for i in list_imported:
        print(i)
        download_report.write(i)
    print(
        "\n------------------------------------------------------------------------------------"
    )
    for i in list_warning:
        print(i)
        download_report.write(i)
    print(
        "\n------------------------------------------------------------------------------------"
    )
    print(
        "------------------------------------------------------------------------------------\n"
    )
    download_report.close()

    if dry_run:
        print("\n NO IMPORTATION, DRY-RUN OPTION IS TRUE \n")
    else:
        print('\n')
        cmd = "dcm2niibatch %s" % (dcm2nii_batch_file)
        subprocess.call(cmd, shell=True)

        # loop for checking if downloaded are ok and create the downloaded files
        #    done_file = open(os.path.join(sub_path, 'downloaded'), 'w')
        #    done_file.close()

        #Data to deface
        #print(files_for_pydeface)
        if files_for_pydeface:
            try:
                template = (
                    "/neurospin/unicog/protocols/IRMf/Unicogfmri/BIDS/"
                    "unicog-dev/bids/template_deface/mean_reg2mean.nii.gz")
                facemask = ("/neurospin/unicog/protocols/IRMf/Unicogfmri/BIDS/"
                            "unicog-dev/bids/template_deface/facemask.nii.gz")
            except:
                template = resource_filename(
                    Requirement.parse("unicog"),
                    "bids/template_deface/mean_reg2mean.nii.gz")
                facemask = resource_filename(
                    Requirement.parse("unicog"),
                    "bids/template_deface/facemask.nii.gz")
            os.environ['FSLDIR'] = "/i2bm/local/fsl/bin/"
            os.environ['FSLOUTPUTTYPE'] = "NIFTI_PAIR"
            os.environ[
                'PATH'] = os.environ['FSLDIR'] + ":" + os.environ['PATH']

            for file_to_deface in files_for_pydeface:
                print(f"\nDeface with pydeface {file_to_deface}")
                pdu.deface_image(infile=file_to_deface,
                                 outfile=file_to_deface,
                                 facemask=facemask,
                                 template=template,
                                 force=True)

        # Create participants.tsv in dataset folder (take out NIP column)
        participants_path = os.path.join(target_root_path, 'participants.tsv')
        df_participant = pd.DataFrame.from_dict(dic_info_participants,
                                                orient="index")
        df_participant.to_csv(participants_path, sep='\t')

        if dict_descriptors:
            #print(dict_descriptors)
            # Adding a new key value pair in a json file such as taskname
            for k, v in dict_descriptors.items():
                with open(k, 'r+') as json_file:
                    for key, val in v.items():
                        temp_json = json.load(json_file)
                        temp_json[key] = val
                        json_file.seek(0)
                        json.dump(temp_json, json_file)
                        json_file.truncate()

        # Copy recorded event files
        if copy_events == "y":
            bids_copy_events(behav_path, data_root_path, dataset_name)

        #Validate paths with BIDSValidator
        #see also http://bids-standard.github.io/bids-validator/
        validation_bids = yes_no(
            '\nDo you want to use a bids validator? (y/n)')
        if validation_bids:
            bids_validation_report = os.path.join(
                report_path, "report_bids_valisation.txt")
            if shutil.which('bids-validator'):
                cmd = f"bids-validator {target_root_path} > {bids_validation_report}"
                subprocess.call(cmd, shell=True)
                cmd = f"cat < {bids_validation_report}"
                subprocess.call(cmd, shell=True)
                print(
                    '\n\nSee the summary of bids validator at {bids_validation_report}'
                )
            else:
                validator = BIDSValidator()
                os.chdir(target_root_path)
                for file_to_test in Path('.').glob('./**/*'):
                    if file_to_test.is_file():
                        file_to_test = '/' + str(file_to_test)
                        print(
                            '\nTest the following name of file : {name} with BIDSValidator'
                            .format(name=file_to_test))
                        print(validator.is_bids(file_to_test))

    print('\n')
Example #3
0
def main():
    """Command line call argument parsing."""
    parser = argparse.ArgumentParser()
    parser.add_argument('infile', metavar='path', help="Path to input nifti.")

    parser.add_argument("--outfile",
                        metavar='path',
                        required=False,
                        help="If not provided adds '_defaced' suffix.")

    parser.add_argument("--force",
                        action='store_true',
                        help="Force to rewrite the output even if it exists.")

    parser.add_argument(
        '--applyto',
        nargs='+',
        required=False,
        metavar='',
        help="Apply the created face mask to other images. Can take multiple "
        "arguments.")

    parser.add_argument(
        "--cost",
        metavar='mutualinfo',
        required=False,
        default='mutualinfo',
        help="FSL-FLIRT cost function. Default is 'mutualinfo'.")

    parser.add_argument(
        "--template",
        metavar='path',
        required=False,
        help=("Optional template image that will be used as the registration "
              "target instead of the default."))

    parser.add_argument(
        "--facemask",
        metavar='path',
        required=False,
        help="Optional face mask image that will be used instead of the "
        "default.")

    parser.add_argument("--nocleanup",
                        action='store_true',
                        help="Do not cleanup temporary files. Off by default.")

    parser.add_argument("--verbose",
                        action='store_true',
                        help="Show additional status prints. Off by default.")

    parser.add_argument('--debug',
                        action='store_true',
                        dest='debug',
                        help='Do not catch exceptions and show exception '
                        'traceback (Drop into pdb debugger).')

    welcome_str = 'pydeface ' + require("pydeface")[0].version
    welcome_decor = '-' * len(welcome_str)
    print(welcome_decor + '\n' + welcome_str + '\n' + welcome_decor)

    args = parser.parse_args()
    if args.debug:
        setup_exceptionhook()

    warped_mask_img, warped_mask, template_reg, template_reg_mat =\
        pdu.deface_image(**vars(args))

    # apply mask to other given images
    if args.applyto is not None:
        print("Defacing mask also applied to:")
        for applyfile in args.applyto:
            applyfile_img = load(applyfile)
            outdata = applyfile_img.get_data() * warped_mask_img.get_data()
            applyfile_img = Nifti1Image(outdata, applyfile_img.get_affine(),
                                        applyfile_img.get_header())
            outfile = pdu.output_checks(applyfile)
            applyfile_img.to_filename(outfile)
            print('  %s' % applyfile)

    if not args.nocleanup:
        pdu.cleanup_files(warped_mask, template_reg, template_reg_mat)

    print('Finished.')
Example #4
0
def deface(bidsdir: str, pattern: str, subjects: list, output: str, cluster: bool, nativespec: str, kwargs: dict):
    """

    :param bidsdir:     The bids-directory with the (multi-echo) subject data
    :param pattern:     Globlike search pattern (relative to the subject/session folder) to select the images that need to be defaced, e.g. 'anat/*_T1w*'
    :param subjects:    List of sub-# identifiers to be processed (the sub- prefix can be left out). If not specified then all sub-folders in the bidsfolder will be processed
    :param output:      Determines where the defaced images are saved. It can be the name of a BIDS modality folder, such as 'anat', or of the derivatives folder, i.e. 'derivatives'. If output is left empty then the original images are replaced by the defaced images
    :param cluster:     Flag to submit the deface jobs to the high-performance compute (HPC) cluster
    :param nativespec:  DRMAA native specifications for submitting deface jobs to the HPC cluster
    :param kwargs:      Additional arguments (in dict/json-style) that are passed to pydeface. See examples for usage
    :return:
    """

    # Input checking
    bidsdir = Path(bidsdir).resolve()

    # Start logging
    bids.setup_logging(bidsdir/'code'/'bidscoin'/'deface.log')
    LOGGER.info('')
    LOGGER.info('------------ START deface ------------')
    LOGGER.info(f">>> deface bidsfolder={bidsdir} pattern={pattern} subjects={subjects} output={output}"
                f" cluster={cluster} nativespec={nativespec} {kwargs}")

    # Get the list of subjects
    if not subjects:
        subjects = bids.lsdirs(bidsdir, 'sub-*')
        if not subjects:
            LOGGER.warning(f"No subjects found in: {bidsdir/'sub-*'}")
    else:
        subjects = ['sub-' + subject.replace('^sub-', '') for subject in subjects]              # Make sure there is a "sub-" prefix
        subjects = [bidsdir/subject for subject in subjects if (bidsdir/subject).is_dir()]

    # Prepare the HPC job submission
    with drmaa.Session() as pbatch:
        if cluster:
            jt                     = pbatch.createJobTemplate()
            jt.jobEnvironment      = os.environ
            jt.remoteCommand       = shutil.which('pydeface')
            jt.nativeSpecification = nativespec
            jt.joinFiles           = True

        # Loop over bids subject/session-directories
        for n, subject in enumerate(subjects, 1):

            sessions = bids.lsdirs(subject, 'ses-*')
            if not sessions:
                sessions = [subject]
            for session in sessions:

                LOGGER.info('--------------------------------------')
                LOGGER.info(f"Processing ({n}/{len(subjects)}): {session}")

                sub_id, ses_id = bids.get_subid_sesid(session/'dum.my')

                # Search for images that need to be defaced
                for match in sorted([match for match in session.glob(pattern) if '.nii' in match.suffixes]):

                    # Construct the output filename and relative path name (used in BIDS)
                    match_rel = str(match.relative_to(session))
                    if not output:
                        outputfile     = match
                        outputfile_rel = match_rel
                    elif output == 'derivatives':
                        outputfile     = bidsdir/'derivatives'/'deface'/sub_id/ses_id/match.parent.name/match.name
                        outputfile_rel = str(outputfile.relative_to(bidsdir))
                    else:
                        outputfile     = session/output/match.name
                        outputfile_rel = str(outputfile.relative_to(session))
                    outputfile.parent.mkdir(parents=True, exist_ok=True)

                    # Deface the image
                    LOGGER.info(f"Defacing: {match_rel} -> {outputfile_rel}")
                    if cluster:
                        jt.args    = [str(match), '--outfile', str(outputfile), '--force'] + [item for pair in [[f"--{key}",val] for key,val in kwargs.items()] for item in pair]
                        jt.jobName = f"pydeface_{sub_id}_{ses_id}"
                        jobid      = pbatch.runJob(jt)
                        LOGGER.info(f"Your deface job has been submitted with ID: {jobid}")
                    else:
                        pdu.deface_image(str(match), str(outputfile), force=True, forcecleanup=True, **kwargs)

                    # Overwrite or add a json sidecar-file
                    inputjson  = match.with_suffix('').with_suffix('.json')
                    outputjson = outputfile.with_suffix('').with_suffix('.json')
                    if inputjson.is_file() and inputjson != outputjson:
                        if outputjson.is_file():
                            LOGGER.info(f"Overwriting the json sidecar-file: {outputjson}")
                            outputjson.unlink()
                        else:
                            LOGGER.info(f"Adding a json sidecar-file: {outputjson}")
                        shutil.copyfile(inputjson, outputjson)

                    # Add a custom "Defaced" field to the json sidecar-file
                    with outputjson.open('r') as output_fid:
                        data = json.load(output_fid)
                    data['Defaced'] = True
                    with outputjson.open('w') as output_fid:
                        json.dump(data, output_fid, indent=4)

                    # Update the IntendedFor fields in the fieldmap sidecar-files
                    if output and output != 'derivatives' and (session/'fmap').is_dir():
                        for fmap in (session/'fmap').glob('*.json'):
                            with fmap.open('r') as fmap_fid:
                                fmap_data = json.load(fmap_fid)
                            intendedfor = fmap_data['IntendedFor']
                            if type(intendedfor)==str:
                                intendedfor = [intendedfor]
                            if match_rel in intendedfor:
                                LOGGER.info(f"Updating 'IntendedFor' to {outputfile_rel} in {fmap}")
                                fmap_data['IntendedFor'] = intendedfor + [outputfile_rel]
                                with fmap.open('w') as fmap_fid:
                                    json.dump(fmap_data, fmap_fid, indent=4)

                    # Update the scans.tsv file
                    if (bidsdir/'.bidsignore').is_file():
                        with (bidsdir/'.bidsignore').open('r') as fid:
                            bidsignore = fid.read().splitlines()
                    else:
                        bidsignore = [bids.unknownmodality + '/']
                    bidsignore.append('derivatives/')
                    scans_tsv = session/f"{sub_id}{bids.add_prefix('_',ses_id)}_scans.tsv"
                    if output and output+'/' not in bidsignore and scans_tsv.is_file():
                        LOGGER.info(f"Adding {outputfile_rel} to {scans_tsv}")
                        scans_table                     = pd.read_csv(scans_tsv, sep='\t', index_col='filename')
                        scans_table.loc[outputfile_rel] = scans_table.loc[match_rel]
                        scans_table.sort_values(by=['acq_time','filename'], inplace=True)
                        scans_table.to_csv(scans_tsv, sep='\t', encoding='utf-8')

        if cluster:
            LOGGER.info('Waiting for the deface jobs to finish...')
            pbatch.synchronize(jobIds=[pbatch.JOB_IDS_SESSION_ALL], timeout=pbatch.TIMEOUT_WAIT_FOREVER, dispose=True)
            pbatch.deleteJobTemplate(jt)


    LOGGER.info('-------------- FINISHED! -------------')
    LOGGER.info('')
Example #5
0
def main():
    parser = ArgumentParser(
        description="Defaces anatomical data in a BIDS dataset",
        formatter_class=ArgumentDefaultsHelpFormatter,
    )

    g_required = parser.add_mutually_exclusive_group(required=True)
    g_required.add_argument("--study",
                            action="store",
                            help="Nickname of the study to process")
    g_required.add_argument(
        "--bids-dir",
        action="store",
        metavar="DIR",
        type=lambda x: _is_dir(x, parser),
        help="The root directory of the BIDS dataset to process",
    )

    g_bids = parser.add_argument_group("Options for filtering BIDS queries")
    g_bids.add_argument(
        "-s",
        "--suffix-id",
        action="store",
        nargs="+",
        default=["T1w"],
        help="Select a specific BIDS suffix to be processed",
    )
    g_bids.add_argument(
        "--skip-bids-validation",
        action="store_true",
        default=False,
        help="Assume the input dataset is BIDS compatible and skip validation",
    )
    g_bids.add_argument(
        "--bids-database-dir",
        action="store",
        type=lambda x: _is_dir(x, parser),
        help="Path to a PyBIDS database directory for faster indexing",
    )

    g_perfm = parser.add_argument_group("Options for logging and debugging")
    g_perfm.add_argument("--quiet",
                         action="store_true",
                         default=False,
                         help="Minimal logging")
    g_perfm.add_argument("--verbose",
                         action="store_true",
                         default=False,
                         help="Maximal logging")
    g_perfm.add_argument("--dry-run",
                         action="store_true",
                         default=False,
                         help="Do nothing")

    args = parser.parse_args()

    if args.verbose:
        logger.setLevel(logging.INFO)
    if args.quiet:
        logger.setLevel(logging.ERROR)

    if args.study:
        config = datman.config.config(study=args.study)
        bids_dir = config.get_path("bids")
    else:
        bids_dir = args.bids_dir

    layout = BIDSLayout(
        bids_dir,
        validate=args.skip_bids_validation,
        database_path=args.bids_database_dir,
    )

    anat_list = layout.get(suffix=args.suffix_id,
                           extension=[".nii", ".nii.gz"])
    keys_to_extract = [
        "subject",
        "session",
        "acquisition",
        "ceagent",
        "reconstruction",
        "run",
        "suffix",
    ]

    for anat in anat_list:

        entities = {
            key: anat.entities.get(key, None)
            for key in keys_to_extract
        }
        if (entities["acquisition"] is not None
                and "defaced" in entities["acquisition"]):
            continue
        if entities["acquisition"] is not None:
            entities["acquisition"] = entities["acquisition"] + "defaced"
        else:
            entities["acquisition"] = "defaced"

        output_file = Path(bids_dir, layout.build_path(entities))

        if not output_file.exists():
            if args.dry_run:
                logger.info(
                    f"DRYRUN would have executed defacing on <{anat.path}> "
                    f"and output to <{output_file}>")
                continue

            try:
                deface_image(infile=anat.path, outfile=str(output_file))
            except Exception as e:
                logger.error(f"Defacing failed to run on <{anat.path}> for "
                             f"reason {e}")
                return

            anat_metadata = anat.get_metadata()
            anat_metadata["DefaceSoftware"] = "pydeface"
            with open(str(output_file).replace(".nii.gz", ".json"), "w+") as f:
                json.dump(anat_metadata, f, indent=4)