def get_processed_images(caps_directory, subjects, sessions):
        import os
        import re

        from clinica.utils.input_files import T1_FS_T_DESTRIEUX
        from clinica.utils.inputs import clinica_file_reader
        from clinica.utils.longitudinal import get_long_id
        from clinica.utils.participant import get_unique_subjects

        [list_participant_id, list_list_session_ids] = get_unique_subjects(
            subjects, sessions
        )
        list_long_id = [
            get_long_id(list_session_ids) for list_session_ids in list_list_session_ids
        ]

        image_ids = []
        if os.path.isdir(caps_directory):
            t1_freesurfer_files = clinica_file_reader(
                list_participant_id,
                list_long_id,
                caps_directory,
                T1_FS_T_DESTRIEUX,
                False,
            )
            image_ids = [
                re.search(r"(sub-[a-zA-Z0-9]+)_(long-[a-zA-Z0-9]+)", file).group()
                for file in t1_freesurfer_files
            ]
        return image_ids
    def build_input_node(self):
        """Build and connect an input node to the pipeline."""
        import os

        import nipype.interfaces.utility as nutil
        import nipype.pipeline.engine as npe

        from clinica.utils.exceptions import ClinicaCAPSError, ClinicaException
        from clinica.utils.filemanip import extract_subjects_sessions_from_filename
        from clinica.utils.input_files import T1_FS_DESTRIEUX
        from clinica.utils.inputs import clinica_file_reader
        from clinica.utils.longitudinal import (
            get_long_id,
            get_participants_long_id,
            read_sessions,
        )
        from clinica.utils.participant import (
            get_unique_subjects,
            unique_subjects_sessions_to_subjects_sessions,
        )
        from clinica.utils.stream import cprint

        from .longitudinal_utils import (
            extract_participant_long_ids_from_filename,
            save_part_sess_long_ids_to_tsv,
        )

        # Display image(s) already present in CAPS folder
        # ===============================================
        output_ids = self.get_processed_images(
            self.caps_directory, self.subjects, self.sessions
        )
        (
            processed_participants,
            processed_long_sessions,
        ) = extract_participant_long_ids_from_filename(output_ids)
        if len(processed_participants) > 0:
            cprint(
                msg=(
                    f"Clinica found {len(processed_participants)} participant(s) "
                    "already processed in CAPS directory:"
                ),
                lvl="warning",
            )
            for p_id, l_id in zip(processed_participants, processed_long_sessions):
                cprint(f"{p_id} | {l_id}", lvl="warning")
            if self.overwrite_caps:
                output_folder = "<CAPS>/subjects/<participant_id>/<long_id>/freesurfer_unbiased_template/"
                cprint(f"Output folders in {output_folder} will be recreated.", lvl="warning")
            else:
                cprint("Participant(s) will be ignored by Clinica.", lvl="warning")
                input_ids = [
                    p_id + "_" + s_id
                    for p_id, s_id in zip(self.subjects, self.sessions)
                ]
                processed_sessions_per_participant = [
                    read_sessions(self.caps_directory, p_id, l_id)
                    for (p_id, l_id) in zip(
                        processed_participants, processed_long_sessions
                    )
                ]
                participants, sessions = unique_subjects_sessions_to_subjects_sessions(
                    processed_participants, processed_sessions_per_participant
                )
                processed_ids = [
                    p_id + "_" + s_id for p_id, s_id in zip(participants, sessions)
                ]
                to_process_ids = list(set(input_ids) - set(processed_ids))
                self.subjects, self.sessions = extract_subjects_sessions_from_filename(
                    to_process_ids
                )

        # Check that t1-freesurfer has run on the CAPS directory
        try:
            clinica_file_reader(
                self.subjects, self.sessions, self.caps_directory, T1_FS_DESTRIEUX
            )
        except ClinicaException as e:
            err_msg = (
                "Clinica faced error(s) while trying to read files in your CAPS directory.\n"
                + str(e)
            )
            raise ClinicaCAPSError(err_msg)

        # Save subjects to process in <WD>/<Pipeline.name>/participants.tsv
        folder_participants_tsv = os.path.join(self.base_dir, self.name)
        long_ids = get_participants_long_id(self.subjects, self.sessions)
        save_part_sess_long_ids_to_tsv(
            self.subjects, self.sessions, long_ids, folder_participants_tsv
        )

        [list_participant_id, list_list_session_ids] = get_unique_subjects(
            self.subjects, self.sessions
        )
        list_long_id = [
            get_long_id(list_session_ids) for list_session_ids in list_list_session_ids
        ]

        def print_images_to_process(
            unique_part_list, per_part_session_list, list_part_long_id
        ):
            cprint(
                f"The pipeline will be run on the following {len(unique_part_list)} participant(s):"
            )
            for (part_id, list_sess_id, list_id) in zip(
                unique_part_list, per_part_session_list, list_part_long_id
            ):
                sessions_participant = ", ".join(s_id for s_id in list_sess_id)
                cprint(f"\t{part_id} | {sessions_participant} | {list_id}")

        if len(self.subjects):
            # TODO: Generalize long IDs to the message display
            print_images_to_process(
                list_participant_id, list_list_session_ids, list_long_id
            )
            cprint(
                "List available in %s"
                % os.path.join(folder_participants_tsv, "participants.tsv")
            )
            cprint("The pipeline will last approximately 10 hours per participant.")

        read_node = npe.Node(
            name="ReadingFiles",
            iterables=[
                ("participant_id", list_participant_id),
                ("list_session_ids", list_list_session_ids),
            ],
            synchronize=True,
            interface=nutil.IdentityInterface(fields=self.get_input_fields()),
        )
        # fmt: off
        self.connect(
            [
                (read_node, self.input_node, [("participant_id", "participant_id")]),
                (read_node, self.input_node, [("list_session_ids", "list_session_ids")]),
            ]
        )
    def build_input_node(self):
        """Build and connect an input node to the pipeline."""
        import os
        from colorama import Fore

        import nipype.interfaces.utility as nutil
        import nipype.pipeline.engine as npe

        from clinica.utils.exceptions import ClinicaException, ClinicaCAPSError
        from clinica.utils.filemanip import extract_subjects_sessions_from_filename
        from clinica.utils.inputs import clinica_file_reader
        from clinica.utils.input_files import T1_FS_DESTRIEUX
        from clinica.utils.longitudinal import get_long_id, read_sessions, get_participants_long_id
        from clinica.utils.participant import get_unique_subjects, unique_subjects_sessions_to_subjects_sessions
        from clinica.utils.stream import cprint
        from .longitudinal_utils import extract_participant_long_ids_from_filename, save_part_sess_long_ids_to_tsv

        # Display image(s) already present in CAPS folder
        # ===============================================
        output_ids = self.get_processed_images(self.caps_directory,
                                               self.subjects, self.sessions)
        processed_participants, processed_long_sessions = extract_participant_long_ids_from_filename(
            output_ids)
        if len(processed_participants) > 0:
            cprint(
                "%sClinica found %s participant(s) already processed in CAPS directory:%s"
                % (Fore.YELLOW, len(processed_participants), Fore.RESET))
            for p_id, l_id in zip(processed_participants,
                                  processed_long_sessions):
                cprint("%s\t%s | %s%s" % (Fore.YELLOW, p_id, l_id, Fore.RESET))
            if self.overwrite_caps:
                output_folder = "<CAPS>/subjects/<participant_id>/<long_id>/freesurfer_unbiased_template/"
                cprint("%s\nOutput folders in %s will be recreated.\n%s" %
                       (Fore.YELLOW, output_folder, Fore.RESET))
            else:
                cprint("%s\nParticipant(s) will be ignored by Clinica.\n%s" %
                       (Fore.YELLOW, Fore.RESET))
                input_ids = [
                    p_id + '_' + s_id
                    for p_id, s_id in zip(self.subjects, self.sessions)
                ]
                processed_sessions_per_participant = [
                    read_sessions(self.caps_directory, p_id, l_id)
                    for (p_id, l_id) in zip(processed_participants,
                                            processed_long_sessions)
                ]
                participants, sessions = unique_subjects_sessions_to_subjects_sessions(
                    processed_participants, processed_sessions_per_participant)
                processed_ids = [
                    p_id + '_' + s_id
                    for p_id, s_id in zip(participants, sessions)
                ]
                to_process_ids = list(set(input_ids) - set(processed_ids))
                self.subjects, self.sessions = extract_subjects_sessions_from_filename(
                    to_process_ids)

        # Check that t1-freesurfer has run on the CAPS directory
        try:
            clinica_file_reader(self.subjects, self.sessions,
                                self.caps_directory, T1_FS_DESTRIEUX)
        except ClinicaException as e:
            err_msg = 'Clinica faced error(s) while trying to read files in your CAPS directory.\n' + str(
                e)
            raise ClinicaCAPSError(err_msg)

        # Save subjects to process in <WD>/<Pipeline.name>/participants.tsv
        folder_participants_tsv = os.path.join(self.base_dir, self.name)
        long_ids = get_participants_long_id(self.subjects, self.sessions)
        save_part_sess_long_ids_to_tsv(self.subjects, self.sessions, long_ids,
                                       folder_participants_tsv)

        [list_participant_id,
         list_list_session_ids] = get_unique_subjects(self.subjects,
                                                      self.sessions)
        list_long_id = [
            get_long_id(list_session_ids)
            for list_session_ids in list_list_session_ids
        ]

        def print_images_to_process(unique_part_list, per_part_session_list,
                                    list_part_long_id):
            cprint(
                'The pipeline will be run on the following %s participant(s):'
                % len(unique_part_list))
            for (part_id, list_sess_id,
                 list_id) in zip(unique_part_list, per_part_session_list,
                                 list_part_long_id):
                sessions_participant = ', '.join(s_id for s_id in list_sess_id)
                cprint("\t%s | %s | %s" %
                       (part_id, sessions_participant, list_id))

        if len(self.subjects):
            # TODO: Generalize long IDs to the message display
            print_images_to_process(list_participant_id, list_list_session_ids,
                                    list_long_id)
            cprint('List available in %s' %
                   os.path.join(folder_participants_tsv, 'participants.tsv'))
            cprint(
                'The pipeline will last approximately 10 hours per participant.'
            )

        read_node = npe.Node(
            name="ReadingFiles",
            iterables=[
                ('participant_id', list_participant_id),
                ('list_session_ids', list_list_session_ids),
            ],
            synchronize=True,
            interface=nutil.IdentityInterface(fields=self.get_input_fields()))
        self.connect([
            (read_node, self.input_node, [('participant_id', 'participant_id')
                                          ]),
            (read_node, self.input_node, [('list_session_ids',
                                           'list_session_ids')]),
        ])
Exemple #4
0
def init_input_node(caps_dir, participant_id, list_session_ids, output_dir):
    """Initialize the pipeline.

    This function will create folders and symbolic links in SUBJECTS_DIR env variable for upcoming run of recon-all.

    Note (@alexis-g-icm):
        There currently (as of 22 Feb 2019) is a bug in FreeSurfer recon-all -base, which in some cases (e.g., only one
        time point), will crash as it's trying to write lines too long for the shell to handle. This is caused by
        the path to FreeSurfer SUBJECT_DIR being too long itself.

    The current function works around this issue by checking if there only is one session associated to a subject, and
    in that case, putting the SUBJECT_DIR inside the system temporary folder so that its path is as short as possible.
    """
    import os
    import errno
    import datetime
    from tempfile import mkdtemp
    from colorama import Fore
    from clinica.utils.stream import cprint
    from clinica.utils.longitudinal import get_long_id
    from clinica.utils.ux import print_begin_image

    # Extract <image_id>
    long_id = get_long_id(list_session_ids)
    image_id = participant_id + '_' + long_id

    # Create SUBJECTS_DIR for recon-all (otherwise, the command won't run)
    if len(list_session_ids) == 1:
        # Special case: When only one time point is used, 'recon-all -base' can failed
        # if the $SUBJECTS_DIR is too long ('Word too long.' error).
        # To circumvent this issue, we create a sym link in $(TMP) so that $SUBJECTS_DIR is a short path
        subjects_dir = mkdtemp()
        now = datetime.datetime.now().strftime('%H:%M:%S')
        cprint(
            '%s[%s] %s has only one time point. Needs to create a $SUBJECTS_DIR folder in %s%s'
            % (Fore.YELLOW, now, image_id.replace(
                '_', ' | '), subjects_dir, Fore.RESET))
    else:
        subjects_dir = os.path.join(output_dir, image_id)
    try:
        os.makedirs(subjects_dir)
    except OSError as e:
        if e.errno != errno.EEXIST:  # EEXIST: folder already exists
            raise e

    # Create symbolic links containing cross-sectional segmentation(s) in SUBJECTS_DIR so that recon-all can run
    for session_id in list_session_ids:
        cross_sectional_path = os.path.join(caps_dir, 'subjects',
                                            participant_id, session_id, 't1',
                                            'freesurfer_cross_sectional',
                                            participant_id + '_' + session_id)
        try:
            os.symlink(
                cross_sectional_path,
                os.path.join(subjects_dir, participant_id + '_' + session_id))
        except FileExistsError as e:
            if e.errno != errno.EEXIST:  # EEXIST: folder already exists
                raise e

    # Prepare arguments for recon-all.
    flags = ""
    for session_id in list_session_ids:
        flags += " -tp " + participant_id + "_" + session_id

    print_begin_image(image_id)

    return image_id, subjects_dir, flags
def init_input_node(caps_dir, participant_id, list_session_ids, output_dir):
    """Initialize the pipeline.

    This function will create folders and symbolic links in SUBJECTS_DIR env variable for upcoming run of recon-all.

    Note (@alexis-g-icm):
        There currently (as of 22 Feb 2019) is a bug in FreeSurfer recon-all -base, which in some cases (e.g., only one
        time point), will crash as it's trying to write lines too long for the shell to handle. This is caused by
        the path to FreeSurfer SUBJECT_DIR being too long itself.

    The current function works around this issue by checking if there only is one session associated to a subject, and
    in that case, putting the SUBJECT_DIR inside the system temporary folder so that its path is as short as possible.
    """
    import os
    from tempfile import mkdtemp

    from clinica.compat import errno
    from clinica.utils.longitudinal import get_long_id
    from clinica.utils.stream import cprint
    from clinica.utils.ux import print_begin_image

    # Extract <image_id>
    long_id = get_long_id(list_session_ids)
    image_id = f"{participant_id}_{long_id}"

    # Create SUBJECTS_DIR for recon-all (otherwise, the command won't run)
    if len(list_session_ids) == 1:
        # Special case: When only one time point is used, 'recon-all -base' can failed
        # if the $SUBJECTS_DIR is too long ('Word too long.' error).
        # To circumvent this issue, we create a sym link in $(TMP) so that $SUBJECTS_DIR is a short path
        subjects_dir = mkdtemp()
        cprint(
            msg=(
                f"{image_id.replace('_', ' | ')} has only one time point. "
                f"Needs to create a $SUBJECTS_DIR folder in {subjects_dir}"
            ),
            lvl="warning",
        )
    else:
        subjects_dir = os.path.join(output_dir, image_id)

    os.makedirs(subjects_dir, exist_ok=True)

    # Create symbolic links containing cross-sectional segmentation(s) in SUBJECTS_DIR so that recon-all can run
    for session_id in list_session_ids:
        cross_sectional_path = os.path.join(
            caps_dir,
            "subjects",
            participant_id,
            session_id,
            "t1",
            "freesurfer_cross_sectional",
            f"{participant_id}_{session_id}",
        )
        try:
            os.symlink(
                cross_sectional_path,
                os.path.join(subjects_dir, f"{participant_id}_{session_id}"),
            )
        except FileExistsError as e:
            if e.errno != errno.EEXIST:  # EEXIST: folder already exists
                raise e

    # Prepare arguments for recon-all.
    flags = ""
    for session_id in list_session_ids:
        flags += f" -tp {participant_id}_{session_id}"

    print_begin_image(image_id)

    return image_id, subjects_dir, flags