예제 #1
0
 def copyRawFile(self, destination: str) -> None:
     if os.path.isfile(os.path.join(destination, self.currentFile(True))):
         logger.warning("{}: File {} exists at destination".format(
             self.recIdentity(), self.currentFile(True)))
     shutil.copy2(self.currentFile(), destination)
     if self._nii_type == "ni1":
         data_file = tools.change_ext(self.currentFile(), "img")
         shutil.copy2(data_file, destination)
예제 #2
0
파일: MRI.py 프로젝트: Remi-Gau/bidsme
    def _copy_bidsified(self, directory: str, bidsname: str, ext: str) -> None:
        """
        Copies bidsified data files to its destinattion.

        Additionally, if modality is dwi (diffusion MRI),
        look for file of same name and extentions bvec and
        bval, and copies it. Will show a warning if such files
        not found.

        Parameters
        ----------
        directory: str
            destination directory where files should be copies,
            including modality folder. Assured to exists.
        bidsname: str
            bidsified name without extention
        ext: str
            extention of the data file
        """
        bids_base = os.path.join(directory, bidsname)
        shutil.copy2(self.currentFile(), bids_base + ext)

        if self.Modality() == "dwi":
            bvec = tools.change_ext(self.currentFile(), "bvec")
            if os.path.isfile(bvec):
                shutil.copy2(bvec, bids_base + ".bvec")
            else:
                logger.warning(
                    "{} missing bvec file for diffusion recording".format(
                        self.recIdentity()))
            bval = tools.change_ext(self.currentFile(), "bval")
            if os.path.isfile(bval):
                shutil.copy2(bval, bids_base + ".bval")
            else:
                logger.warning(
                    "{} missing bval file for diffusion recording".format(
                        self.recIdentity()))
예제 #3
0
def process(source: str,
            destination: str,
            plugin_file: str = "",
            plugin_opt: dict = {},
            sub_list: list = [],
            sub_skip_tsv: bool = False,
            sub_skip_dir: bool = False,
            ses_skip_dir: bool = False,
            part_template: str = "",
            bidsmapfile: str = "bidsmap.yaml",
            dry_run: bool = False) -> None:
    """
    Process bidsified dataset before the bidsification.
    Can be used to produce derivatives, convertion
    anonymisation with adventage of recording identification
    by bidsmap.yaml

    Essentually it is identical to bidsification but without
    bidsification itself.

    Only subjects in source/participants.tsv are treated,
    this list can be narrowed using sub_list, sub_skip_tsv
    and sub_skip_dir options

    Parameters
    ----------
    source: str
        folder containing source dataset
    destination: str
        folder for prepeared dataset
    plugin_file: str
        path to the plugin file to use
    plugin_opt: dict
        named options passed to plugin
    sub_list: list
        list of subject to process. Subjects
        are checked after plugin and must
        start with 'sub-', as in destination
        folder
    sub_skip_tsv: bool
        if set to True, subjects found in
        destination/participants.tsv will be
        ignored
    sub_skip_dir: bool
        if set to true, subjects with already
        created directories will be ignored
        Can conflict with sub_no_dir
    ses_skip_dir: bool
        if set to True, sessions with already
        created directories will be ignored
        Can conflict with ses_no_dir
    part_template: str
        path to template json file, from whitch
        participants.tsv will be modeled. If unset
        the defeault one "source/participants.tsv"
        is used. Setting this variable may break
        workflow
    bidsmapfile: str
        The name of bidsmap file, will be searched for
        in destination/code/bidsmap directory, unless
        path is absolute
    dry_run: bool
        if set to True, no disk writing operations
        will be performed
    """

    logger.info("-------------- Processing data -------------")
    logger.info("Source directory: {}".format(source))
    logger.info("Destination directory: {}".format(destination))

    # Input checking
    # source = os.path.abspath(source)
    if not os.path.isdir(source):
        logger.critical("Source directory {} don't exists".format(source))
        raise NotADirectoryError(source)
    if not os.path.isdir(destination):
        logger.critical(
            "Destination directory {} don't exists".format(destination))
        raise NotADirectoryError(destination)

    # Input checking & defaults
    bidscodefolder = os.path.join(destination, 'code', 'bidsme')

    # Create a code/bidsme subfolder
    os.makedirs(bidscodefolder, exist_ok=True)

    # Check for dataset description file
    dataset_file = os.path.join(destination, 'dataset_description.json')
    if not os.path.isfile(dataset_file):
        logger.warning("Dataset description file 'dataset_description.json' "
                       "not found in '{}'".format(destination))

    # Check for README file
    readme_file = os.path.join(destination, 'README')
    if not os.path.isfile(readme_file):
        logger.warning("Dataset readme file 'README' "
                       "not found in '{}'".format(destination))

    # Get the bidsmap heuristics from the bidsmap YAML-file
    fname = paths.findFile(bidsmapfile, bidscodefolder, paths.local,
                           paths.config)
    if not fname:
        logger.critical('Bidsmap file {} not found.'.format(bidsmapfile))
        raise FileNotFoundError(bidsmapfile)
    else:
        bidsmapfile = fname
    logger.info("loading bidsmap {}".format(bidsmapfile))
    bidsmap = Bidsmap(bidsmapfile)

    ntotal, ntemplate, nunchecked = bidsmap.countRuns()
    logger.debug("Map contains {} runs".format(ntotal))
    if ntemplate != 0:
        logger.warning("Map contains {} template runs".format(ntemplate))
    if nunchecked != 0:
        logger.critical("Map contains {} unchecked runs".format(nunchecked))
        raise Exception("Unchecked runs present")

    ###############
    # Plugin setup
    ###############
    if plugin_file:
        plugins.ImportPlugins(plugin_file)
        plugins.InitPlugin(source=source,
                           destination=destination,
                           dry=dry_run,
                           **plugin_opt)

    ###############################
    # Checking participants list
    ###############################
    if not part_template:
        part_template = os.path.join(source, "participants.json")
    else:
        logger.warning(
            "Loading exterior participant template {}".format(part_template))
    BidsSession.loadSubjectFields(part_template)

    new_sub_file = os.path.join(source, "participants.tsv")
    df_sub = pandas.read_csv(new_sub_file, sep="\t", header=0,
                             na_values="n/a").drop_duplicates()
    df_dupl = df_sub.duplicated("participant_id")
    if df_dupl.any():
        logger.critical("Participant list contains one or several duplicated "
                        "entries: {}".format(", ".join(
                            df_sub[df_dupl]["participant_id"])))
        raise Exception("Duplicated subjects")

    dupl_file = os.path.join(source, "__duplicated.tsv")
    if os.path.isfile(dupl_file):
        logger.critical("Found unmerged file with duplicated subjects")
        raise FileExistsError(dupl_file)

    new_sub_json = os.path.join(source, "participants.json")
    if not tools.checkTsvDefinitions(df_sub, new_sub_json):
        raise Exception("Incompatible sidecar json")

    old_sub_file = os.path.join(destination, "participants.tsv")
    old_sub = None
    if os.path.isfile(old_sub_file):
        old_sub = pandas.read_csv(old_sub_file,
                                  sep="\t",
                                  header=0,
                                  na_values="n/a")
        if not old_sub.columns.equals(df_sub.columns):
            logger.warning("Source participant.tsv has different columns "
                           "from destination dataset")
        old_sub = old_sub["participant_id"]

    ##############################
    # Subjects loop
    ##############################
    n_subjects = len(df_sub["participant_id"])
    for index, sub_row in df_sub.iterrows():
        sub_no = index + 1
        sub_id = sub_row["participant_id"]
        sub_dir = os.path.join(source, sub_id)
        if not os.path.isdir(sub_dir):
            logger.error("{}: Not found in {}".format(sub_id, source))
            continue

        scan = BidsSession()
        scan.in_path = sub_dir
        scan.subject = sub_id

        #################################################
        # Cloning df_sub row values in scans sub_values
        #################################################
        for column in df_sub.columns:
            scan.sub_values[column] = sub_row[column]

        # locking subjects here forbids renaming in process
        # as it will be unclear how manage folders with data
        scan.lock_subject()
        if plugins.RunPlugin("SubjectEP", scan) < 0:
            logger.warning("Subject {} discarded by {}".format(
                scan.subject, "SubjectEP"))
            continue

        if not scan.isSubValid():
            logger.error("{}: Subject id '{}' is not valid".format(
                sub_id, scan.subject))
            continue

        if tools.skipEntity(scan.subject, sub_list,
                            old_sub if sub_skip_tsv else None,
                            destination if sub_skip_dir else ""):
            logger.info("Skipping subject '{}'".format(scan.subject))
            continue

        ses_dirs = tools.lsdirs(sub_dir, 'ses-*')
        if not ses_dirs:
            logger.error("{}: No sessions found in: {}".format(
                scan.subject, sub_dir))
            continue

        for ses_dir in ses_dirs:
            scan.in_path = ses_dir
            logger.info("{} ({}/{}): Scanning folder {}".format(
                scan.subject, sub_no, n_subjects, ses_dir))
            scan.unlock_session()
            scan.session = os.path.basename(ses_dir)
            if plugins.RunPlugin("SessionEP", scan) < 0:
                logger.warning("Session {} discarded by {}".format(
                    scan.session, "SessionEP"))
                continue

            scan.lock()

            if ses_skip_dir and tools.skipEntity(
                    scan.session, [], None,
                    os.path.join(destination, scan.subject)):
                logger.info("Skipping session '{}'".format(scan.session))
                continue

            for module in Modules.selector.types_list:
                mod_dir = os.path.join(ses_dir, module)
                if not os.path.isdir(mod_dir):
                    logger.debug("Module {} not found in {}".format(
                        module, ses_dir))
                    continue
                for run in tools.lsdirs(mod_dir):
                    scan.in_path = run
                    cls = Modules.select(run, module)
                    if cls is None:
                        logger.error(
                            "Failed to identify data in {}".format(run))
                        continue
                    recording = cls(rec_path=run)
                    if not recording or len(recording.files) == 0:
                        logger.error(
                            "unable to load data in folder {}".format(run))
                        continue
                    recording.setBidsSession(scan)
                    coin(destination, recording, bidsmap, dry_run)
            plugins.RunPlugin("SessionEndEP", scan)

        scan.in_path = sub_dir
        plugins.RunPlugin("SubjectEndEP", scan)

    ##################################
    # Merging the participants table
    ##################################
    df_processed = BidsSession.exportAsDataFrame()

    col_mismatch = False
    if not df_processed.columns.equals(df_sub.columns):
        col_mismatch = True
        logger.warning("Modified participant table do not match "
                       "original table. This is discouraged and can "
                       "break future preparation and process steps")
        for col in df_processed.columns.difference(df_sub.columns):
            df_sub[col] = None
        df_sub = df_sub[BidsSession.getSubjectColumns()]
        df_sub.drop_duplicates(inplace=True)

    df_res = pandas.concat([df_sub, df_processed],
                           join="inner",
                           keys=("original", "processed"),
                           names=("stage", "ID"))
    df_res = df_res.drop_duplicates()

    df_dupl = df_res.duplicated("participant_id", keep=False)

    if df_dupl.any():
        logger.info("Updating participants values")
        df_dupl = df_dupl.drop(["processed"])
        df_res.drop(df_dupl[df_dupl].index, inplace=True)

    df_dupl = df_res.duplicated("participant_id")
    if df_dupl.any():
        logger.error("Participant list contains one or several duplicated "
                     "entries: {}".format(", ".join(
                         df_res[df_dupl]["participant_id"])))

    ##################################
    # Saving the participants table
    ##################################
    if not dry_run:
        df_res[~df_dupl].to_csv(new_sub_file,
                                sep='\t',
                                na_rep="n/a",
                                index=False,
                                header=True)
        if df_dupl.any():
            logger.info("Saving the list to be merged manually to {}".format(
                dupl_file))
            df_res[df_dupl].to_csv(dupl_file,
                                   sep='\t',
                                   na_rep="n/a",
                                   index=False,
                                   header=True)
        json_file = tools.change_ext(new_sub_file, "json")
        if col_mismatch or not os.path.isfile(json_file):
            BidsSession.exportDefinitions(json_file)

    plugins.RunPlugin("FinaliseEP")
예제 #4
0
 def _copy_bidsified(self, directory: str, bidsname: str, ext: str) -> None:
     shutil.copy2(self.currentFile(), os.path.join(directory,
                                                   bidsname + ext))
     if self._nii_type == "ni1":
         data_file = tools.change_ext(self.currentFile(), "img")
         shutil.copy2(data_file, os.path.join(directory, bidsname + ".img"))
예제 #5
0
 def copyRawFile(self, destination: str) -> None:
     if os.path.isfile(os.path.join(destination, self.currentFile(True))):
         logger.warning("{}: File {} exists at destination".format(
             self.recIdentity(), self.currentFile(True)))
     shutil.copy2(self.currentFile(), destination)
     shutil.copy2(tools.change_ext(self.currentFile(), "json"), destination)