Ejemplo n.º 1
0
def evaluate(args):
    device = torch.device("cuda" if args.cuda else "cpu")

    preprocessor = torch.load(args.preprocessor_file)
    loader_config = dict(
        preprocessor=preprocessor,
        batch_size=args.batch_size,
        device=device,
    )
    eval_dataloader = create_dataloader(args.eval_file, **loader_config, shuffle=False)

    checkpoint = torch.load(args.checkpoint_file)
    model = build_model(
        word_vocab_size=len(preprocessor.vocabs["word"]),
        pretrained_word_vocab_size=len(preprocessor.vocabs["pretrained_word"]),
        postag_vocab_size=len(preprocessor.vocabs["postag"]),
        n_deprels=len(preprocessor.vocabs["deprel"]),
    )
    model.load_state_dict(checkpoint["model"])
    model.to(device)

    trainer = create_trainer(model)
    trainer.add_callback(utils.training.PrintCallback(printer=logger.info))
    deprel_map = {v: k for k, v in preprocessor.vocabs["deprel"].mapping.items()}
    trainer.add_callback(EvaluateCallback(args.eval_file, deprel_map, args.verbose), priority=0)
    with logging_redirect_tqdm(loggers=[logger]):
        trainer.evaluate(eval_dataloader)
Ejemplo n.º 2
0
def build(ctx: click.Context, directory: str, zenodo: bool, no_strict: bool, force: bool):
    """Build all databases."""
    # if no_strict and zenodo:
    #    click.secho("Must be strict before uploading", fg="red")
    #    sys.exit(1)
    with logging_redirect_tqdm():
        click.secho("Collecting metadata and building", fg="cyan", bold=True)
        # note that this is the only one that needs a force=force
        ctx.invoke(metadata, directory=directory, no_strict=no_strict, force=force)
        click.secho("Alternate Identifiers", fg="cyan", bold=True)
        ctx.invoke(alts, directory=directory, zenodo=zenodo, no_strict=no_strict)
        click.secho("Synonyms", fg="cyan", bold=True)
        ctx.invoke(synonyms, directory=directory, zenodo=zenodo, no_strict=no_strict)
        click.secho("Xrefs", fg="cyan", bold=True)
        ctx.invoke(xrefs, directory=directory, zenodo=zenodo, no_strict=no_strict)
        click.secho("Names", fg="cyan", bold=True)
        ctx.invoke(names, directory=directory, zenodo=zenodo, no_strict=no_strict)
        click.secho("Definitions", fg="cyan", bold=True)
        ctx.invoke(definitions, directory=directory, zenodo=zenodo, no_strict=no_strict)
        click.secho("Properties", fg="cyan", bold=True)
        ctx.invoke(properties, directory=directory, zenodo=zenodo, no_strict=no_strict)
        click.secho("Relations", fg="cyan", bold=True)
        ctx.invoke(relations, directory=directory, zenodo=zenodo, no_strict=no_strict)
        click.secho("Typedefs", fg="cyan", bold=True)
        ctx.invoke(typedefs, directory=directory, zenodo=zenodo, no_strict=no_strict)
        click.secho("Species", fg="cyan", bold=True)
        ctx.invoke(species, directory=directory, zenodo=zenodo, no_strict=no_strict)
Ejemplo n.º 3
0
 def test_should_inherit_console_logger_formatter(self):
     logger = logging.Logger('test')
     formatter = logging.Formatter('custom: %(message)s')
     console_handler = logging.StreamHandler(sys.stderr)
     console_handler.setFormatter(formatter)
     logger.handlers = [console_handler]
     with logging_redirect_tqdm(loggers=[logger]):
         assert logger.handlers[0].formatter == formatter
Ejemplo n.º 4
0
 def test_should_not_remove_stream_handlers_not_for_stdout_or_stderr(self):
     logger = logging.Logger('test')
     stream_handler = logging.StreamHandler(StringIO())
     logger.addHandler(stream_handler)
     with logging_redirect_tqdm(loggers=[logger]):
         assert len(logger.handlers) == 2
         assert logger.handlers[0] == stream_handler
         assert isinstance(logger.handlers[1], TqdmLoggingHandler)
     assert logger.handlers == [stream_handler]
Ejemplo n.º 5
0
 def test_should_remove_and_restore_console_handlers(self):
     logger = logging.Logger('test')
     stderr_console_handler = logging.StreamHandler(sys.stderr)
     stdout_console_handler = logging.StreamHandler(sys.stderr)
     logger.handlers = [stderr_console_handler, stdout_console_handler]
     with logging_redirect_tqdm(loggers=[logger]):
         assert len(logger.handlers) == 1
         assert isinstance(logger.handlers[0], TqdmLoggingHandler)
     assert logger.handlers == [
         stderr_console_handler, stdout_console_handler
     ]
Ejemplo n.º 6
0
def species(directory: str, zenodo: bool, no_strict: bool, force: bool):
    """Make the prefix-identifier-species dump."""
    with logging_redirect_tqdm():
        paths = db_output_helper(
            _iter_species,
            "species",
            ("prefix", "identifier", "species"),
            strict=not no_strict,
            force=force,
            directory=directory,
        )
    if zenodo:
        # see https://zenodo.org/record/5334738
        update_zenodo(SPECIES_RECORD, paths)
Ejemplo n.º 7
0
    async def run(self, py_path, wait=False):
        response = await self.send_command_and_get_response(
            "program_modechange", {"mode": "download"}
        )

        with open(py_path, "rb") as demo:
            program = demo.read()

        chunk_size = 512
        chunks = [
            program[i : i + chunk_size] for i in range(0, len(program), chunk_size)
        ]

        while response is None or "transferid" not in response:
            response = await self.send_command_and_get_response(
                "start_write_program",
                {
                    "meta": {
                        "created": 0,
                        "modified": 0,
                        "project_id": "Pybricksdev_",
                        "project_id": "Pybricksdev_",
                        "name": "Pybricksdev_____",
                        "type": "python",
                    },
                    "size": len(program),
                    "slotid": 0,
                },
            )
        transferid = response["transferid"]

        with logging_redirect_tqdm(), tqdm(
            total=len(program), unit="B", unit_scale=True
        ) as pbar:
            for chunk in chunks:
                response = await self.send_command_and_get_response(
                    "write_package",
                    {
                        "data": base64.b64encode(chunk).decode("ascii"),
                        "transferid": transferid,
                    },
                )
                pbar.update(len(chunk))

        await asyncio.sleep(0.5)
        response = await self.send_command_and_get_response(
            "program_execute", {"slotid": 0}
        )
        print(response)
Ejemplo n.º 8
0
def synonyms(directory: str, zenodo: bool, force: bool, no_strict: bool):
    """Make the prefix-identifier-synonym dump."""
    with logging_redirect_tqdm():
        paths = db_output_helper(
            _iter_synonyms,
            "synonyms",
            ("prefix", "identifier", "synonym"),
            directory=directory,
            force=force,
            strict=not no_strict,
            skip_set={"kegg.pathway", "kegg.genes", "kegg.genome"},
        )
    if zenodo:
        # see https://zenodo.org/record/4021482
        update_zenodo(SYNONYMS_RECORD, paths)
Ejemplo n.º 9
0
def properties(directory: str, zenodo: bool, force: bool, no_strict: bool):
    """Make the properties dump."""
    with logging_redirect_tqdm():
        paths = db_output_helper(
            _iter_properties,
            "properties",
            ("prefix", "identifier", "property", "value"),
            directory=directory,
            force=force,
            strict=not no_strict,
            summary_detailed=(0, 2),  # second column corresponds to property type
        )
    if zenodo:
        # see https://zenodo.org/record/4625172
        update_zenodo(PROPERTIES_RECORD, paths)
Ejemplo n.º 10
0
def alts(directory: str, zenodo: bool, force: bool, no_strict: bool):
    """Make the prefix-alt-id dump."""
    with logging_redirect_tqdm():
        paths = db_output_helper(
            _iter_alts,
            "alts",
            ("prefix", "identifier", "alt"),
            directory=directory,
            force=force,
            strict=not no_strict,
            skip_set={"kegg.pathway", "kegg.genes", "kegg.genome", "umls"},
        )
    if zenodo:
        # see https://zenodo.org/record/4021476
        update_zenodo(ALTS_DATA_RECORD, paths)
Ejemplo n.º 11
0
def definitions(directory: str, zenodo: bool, no_strict: bool, force: bool):
    """Make the prefix-identifier-definition dump."""
    with logging_redirect_tqdm():
        paths = db_output_helper(
            _iter_definitions,
            "definitions",
            ("prefix", "identifier", "definition"),
            strict=not no_strict,
            force=force,
            directory=directory,
            skip_set={"kegg.pathway", "kegg.genes", "kegg.genome", "umls"},
        )
    if zenodo:
        # see https://zenodo.org/record/4637061
        update_zenodo(DEFINITIONS_RECORD, paths)
Ejemplo n.º 12
0
def xrefs(directory: str, zenodo: bool, force: bool, no_strict: bool):  # noqa: D202
    """Make the prefix-identifier-xref dump."""
    with logging_redirect_tqdm():
        paths = db_output_helper(
            _iter_xrefs,
            "xrefs",
            ("prefix", "identifier", "xref_prefix", "xref_identifier", "provenance"),
            directory=directory,
            force=force,
            strict=not no_strict,
            summary_detailed=(0, 2),  # second column corresponds to xref prefix
        )
    if zenodo:
        # see https://zenodo.org/record/4021477
        update_zenodo(JAVERT_RECORD, paths)
Ejemplo n.º 13
0
def typedefs(directory: str, zenodo: bool, no_strict: bool, force: bool):
    """Make the typedef prefix-identifier-name dump."""
    with logging_redirect_tqdm():
        paths = db_output_helper(
            _iter_typedefs,
            "typedefs",
            ("prefix", "typedef_prefix", "identifier", "name"),
            strict=not no_strict,
            force=force,
            directory=directory,
            use_gzip=False,
            skip_set={"ncbigene", "kegg.pathway", "kegg.genes", "kegg.genome"},
        )
    if zenodo:
        # see https://zenodo.org/record/4644013
        update_zenodo(TYPEDEFS_RECORD, paths)
Ejemplo n.º 14
0
def train(args):
    if args.seed is not None:
        utils.random.seed_everything(args.seed)
    device = torch.device("cuda" if args.cuda else "cpu")

    preprocessor = Preprocessor()
    preprocessor.build_vocab(args.train_file, cache_dir=args.cache_dir)
    if args.embed_file:
        preprocessor.load_embeddings(args.embed_file, cache_dir=args.cache_dir)
    loader_config = dict(
        preprocessor=preprocessor,
        batch_size=args.batch_size,
        device=device,
        cache_dir=args.cache_dir,
    )
    train_dataloader = create_dataloader(args.train_file, **loader_config, shuffle=True)
    eval_dataloader = None
    if args.eval_file:
        eval_dataloader = create_dataloader(args.eval_file, **loader_config, shuffle=False)

    model = build_model(
        word_vocab_size=len(preprocessor.vocabs["word"]),
        pretrained_word_vocab_size=len(preprocessor.vocabs["pretrained_word"]),
        postag_vocab_size=len(preprocessor.vocabs["postag"]),
        pretrained_word_embeddings=preprocessor.pretrained_word_embeddings,
        n_deprels=len(preprocessor.vocabs["deprel"]),
    )
    model.to(device)

    trainer = create_trainer(
        model, lr=args.learning_rate, max_steps=args.max_steps, eval_interval=args.eval_interval
    )
    trainer.add_callback(utils.training.PrintCallback(printer=logger.info))
    if eval_dataloader:
        deprel_map = {v: k for k, v in preprocessor.vocabs["deprel"].mapping.items()}
        trainer.add_callback(EvaluateCallback(args.eval_file, deprel_map), priority=0)
        if args.save_dir:
            torch.save(preprocessor, os.path.join(args.save_dir, "preprocessor.pt"))
            trainer.add_callback(
                utils.training.SaveCallback(args.save_dir, monitor="eval/UAS", mode="max")
            )
    with logging_redirect_tqdm(loggers=[logger]):
        trainer.fit(train_dataloader, eval_dataloader)
Ejemplo n.º 15
0
    async def run(self, py_path, wait=True, print_output=True):
        """Run a Pybricks MicroPython script on the hub and print output.

        Arguments:
            py_path (str):
                Path to MicroPython script.
            wait (bool):
                Whether to wait for any output until the program completes.
            print_output(bool):
                Whether to print the standard output.
        """

        # Reset output buffer
        self.log_file = None
        self.output = []
        self.print_output = print_output

        # Compile the script to mpy format
        mpy = await compile_file(py_path)

        # Get length of file and send it as bytes to hub
        length = len(mpy).to_bytes(4, byteorder='little')
        await self.send_message(length)

        # Divide script in chunks of bytes
        n = 100
        chunks = [mpy[i:i + n] for i in range(0, len(mpy), n)]

        # Send the data chunk by chunk
        with logging_redirect_tqdm(), tqdm(total=len(mpy),
                                           unit='B',
                                           unit_scale=True) as pbar:
            for chunk in chunks:
                await self.send_message(chunk)
                pbar.update(len(chunk))

        # Optionally wait for the program to finish
        if wait:
            await asyncio.sleep(0.2)
            await self.wait_until_state_is_not(self.RUNNING)
Ejemplo n.º 16
0
def relations(directory: str, zenodo: bool, force: bool, no_strict: bool):
    """Make the relation dump."""
    with logging_redirect_tqdm():
        paths = db_output_helper(
            _iter_relations,
            "relations",
            (
                "source_prefix",
                "source_identifier",
                "relation_prefix",
                "relation_identifier",
                "target_prefix",
                "target_identifier",
            ),
            directory=directory,
            force=force,
            strict=not no_strict,
            summary_detailed=(0, 2, 3),  # second column corresponds to relation type
        )
    if zenodo:
        # see https://zenodo.org/record/4625167
        update_zenodo(RELATIONS_RECORD, paths)
Ejemplo n.º 17
0
async def _download_queue(
    queue: asyncio.Queue,
    session: RetryClient,
    stats: dict,
    params: DownloadParams,
    progressbar: tqdm_asyncio = None,
    logger: logging.Logger = None,
):
    """Consumes items from download queue

    Args:
        queue (asyncio.Queue): Queue of items
        session (RetryClient): RetryClient aiohttp session object
        params (DownloadParams): Download parameter dict
        logger (logging.Logger): Logger object
    """
    while True:
        batch = await queue.get()
        for sample in batch:
            failed = False
            try:
                success = await download_single(sample, session, params)
            except Exception as e:
                with logging_redirect_tqdm(loggers=[logger]):
                    logger.error(e.request_info.url,
                                 extra={"status": e.status})
                    failed = True

            if failed:
                stats["failed"] += 1
            elif not success:
                stats["skipped"] += 1
            else:
                stats["success"] += 1

            progressbar.set_postfix(stats=stats, refresh=True)
            progressbar.update(1)

        queue.task_done()
Ejemplo n.º 18
0
def names(
    directory: str,
    zenodo: bool,
    no_strict: bool,
    force: bool,
    skip_below: Optional[str],
    skip_below_exclusive: bool,
):
    """Make the prefix-identifier-name dump."""
    with logging_redirect_tqdm():
        paths = db_output_helper(
            _iter_names,
            "names",
            ("prefix", "identifier", "name"),
            strict=not no_strict,
            force=force,
            directory=directory,
            skip_below=skip_below,
            skip_below_inclusive=not skip_below_exclusive,
        )
    if zenodo:
        # see https://zenodo.org/record/4020486
        update_zenodo(OOH_NA_NA_RECORD, paths)
Ejemplo n.º 19
0
    async def run(self, py_path, wait=True, print_output=True):

        # Reset output buffer
        self.log_file = None
        self.output = []
        self.print_output = print_output

        # Compile the script to mpy format
        self.script_dir, _ = os.path.split(py_path)
        mpy = await compile_file(py_path)

        try:
            self.loading = True
            self.user_program_stopped.clear()

            # Get length of file and send it as bytes to hub
            length = len(mpy).to_bytes(4, byteorder='little')
            await self.send_block(length)

            # Divide script in chunks of bytes
            n = 100
            chunks = [mpy[i:i + n] for i in range(0, len(mpy), n)]

            # Send the data chunk by chunk
            with logging_redirect_tqdm(), tqdm(total=len(mpy),
                                               unit='B',
                                               unit_scale=True) as pbar:
                for chunk in chunks:
                    await self.send_block(chunk)
                    pbar.update(len(chunk))
        finally:
            self.loading = False

        if wait:
            await self.user_program_stopped.wait()
            await asyncio.sleep(0.3)
Ejemplo n.º 20
0
def update_database():
    with logging_redirect_tqdm():
        install_mp_handler()

        latest_update = Update.get_latest_update(success=False)

        if latest_update.status not in [
                Update.Status.ERROR, Update.Status.SUCCESS
        ]:
            print(
                'Last update revision=%s is still not ended (or hang or crashed). '
                'Would you like to start new update (y) continue previous (N)?'
                % latest_update.id)
            response = input()
            if response.lower() == 'y':
                latest_update.status = 'error'
                latest_update.save()

                latest_update = Update.objects.create(
                    status=Update.Status.IN_PROGRESS)
        else:
            latest_update = Update.objects.create(
                status=Update.Status.IN_PROGRESS)

        # scraping level 3 indexes first
        level_3_koatuu = list(
            set([koatuu for koatuu in _get_indexes() if koatuu.level <= 2]))
        # sort unique ids then (stable sort, so level is still sorted)
        level_3_koatuu.sort(key=attrgetter('unique_id'))
        # sort level from 1 to 3 keeping stable unique_id
        level_3_koatuu.sort(key=attrgetter('level'))

        if latest_update.latest_koatuu:
            logging.info("Searching for the latest koatuu scraped")
            latest_koatuu_obj = next(
                (koatuu for koatuu in level_3_koatuu
                 if koatuu.unique_id == latest_update.latest_koatuu), None)
            if latest_koatuu_obj is None:
                level_3_koatuu = []
            else:
                logging.info("Found latest koatuu scraped %s",
                             latest_koatuu_obj)
                level_3_koatuu = level_3_koatuu[level_3_koatuu.
                                                index(latest_koatuu_obj):]
                logging.info('Koatuu to scrape only %s', len(level_3_koatuu))

        if level_3_koatuu:
            _download_and_insert(latest_update, level_3_koatuu)
        logging.info('All insert l1 operations ended')
        # process level 4 indexes only for regions where parcels
        # number is more than 100000

        annotated = Landuse.objects.all().values('koatuu').filter(
            revision=latest_update.id).annotate(
                total=Count('koatuu')).order_by('-total')

        level_4_koatuu = []
        all_koatuu = list(set([koatuu for koatuu in _get_indexes()]))
        for result in annotated:
            if result['total'] < 100000:
                continue
            koatuu_obj = next(koatuu for koatuu in all_koatuu
                              if koatuu.unique_id == str(result['koatuu']))

            if koatuu_obj.level == 2:
                level_3_koatuus = [
                    *set([
                        koatuu for koatuu in all_koatuu if koatuu.level == 3
                        and str(koatuu.parent) == koatuu_obj.unique_id
                    ])
                ]
                level_4_koatuu.extend(level_3_koatuus)
                for level_3_koatuu in level_3_koatuus:
                    level_4_koatuu.extend([
                        *set([
                            koatuu
                            for koatuu in all_koatuu if koatuu.level == 4
                            and str(koatuu.parent) == level_3_koatuu.unique_id
                        ])
                    ])

            if koatuu_obj.level == 3:
                level_4_koatuu.extend([
                    *set([
                        koatuu for koatuu in all_koatuu if koatuu.level == 4
                        and str(koatuu.parent) == koatuu_obj.unique_id
                    ])
                ])

        level_4_koatuu.sort(key=attrgetter('unique_id'))
        _download_and_insert(latest_update, level_4_koatuu)

    # detecting changes to create analysis table
    create_changeset(
        revision=Update.objects.get(id=latest_update.id),
        previous=Update.objects.get(id=Update.get_latest_update().id),
    )

    # everything is ok => success status
    Update.objects.filter(id=latest_update.id).update(
        status=Update.Status.SUCCESS)
Ejemplo n.º 21
0
def bidsparticipants(rawfolder: str, bidsfolder: str, keys: str, bidsmapfile: str='bidsmap.yaml', dryrun: bool=False) -> None:
    """
    Main function that processes all the subjects and session in the sourcefolder to (re)generate the particpants.tsv file in the BIDS folder.

    :param rawfolder:       The root folder-name of the sub/ses/data/file tree containing the source data files
    :param bidsfolder:      The name of the BIDS root folder
    :param keys:            The keys that are extracted fro mthe source data when populating the participants.tsv file
    :param bidsmapfile:     The name of the bidsmap YAML-file. If the bidsmap pathname is relative (i.e. no "/" in the name) then it is assumed to be located in bidsfolder/code/bidscoin
    :param dryrun:          Boolean to just display the participants info
    :return:                Nothing
    """

    # Input checking & defaults
    rawfolder  = Path(rawfolder).resolve()
    bidsfolder = Path(bidsfolder).resolve()

    # Start logging
    if dryrun:
        bidscoin.setup_logging()
    else:
        bidscoin.setup_logging(bidsfolder/'code'/'bidscoin'/'bidsparticipants.log')
    LOGGER.info('')
    LOGGER.info(f"-------------- START bidsparticipants {bidscoin.version()} ------------")
    LOGGER.info(f">>> bidsparticipants sourcefolder={rawfolder} bidsfolder={bidsfolder} bidsmap={bidsmapfile}")

    # Get the bidsmap sub-/ses-prefix from the bidsmap YAML-file
    bidsmap,_ = bids.load_bidsmap(Path(bidsmapfile), bidsfolder/'code'/'bidscoin')
    subprefix = bidsmap['Options']['bidscoin']['subprefix']
    sesprefix = bidsmap['Options']['bidscoin']['sesprefix']

    # Get the table & dictionary of the subjects that have been processed
    participants_tsv  = bidsfolder/'participants.tsv'
    participants_json = participants_tsv.with_suffix('.json')
    if participants_tsv.is_file():
        participants_table = pd.read_csv(participants_tsv, sep='\t')
        participants_table.set_index(['participant_id'], verify_integrity=True, inplace=True)
    else:
        participants_table = pd.DataFrame()
        participants_table.index.name = 'participant_id'
    if participants_json.is_file():
        with participants_json.open('r') as json_fid:
            participants_dict = json.load(json_fid)
    else:
        participants_dict = {'participant_id': {'Description': 'Unique participant identifier'}}

    # Get the list of subjects
    subjects = bidscoin.lsdirs(bidsfolder, 'sub-*')
    if not subjects:
        LOGGER.warning(f"No subjects found in: {bidsfolder}")

    # Remove obsolete participants from the participants table
    for participant in participants_table.index:
        if participant not in [sub.name for sub in subjects]:
            participants_table = participants_table.drop(participant)

    # Loop over all subjects in the bids-folder and add them to the participants table
    with logging_redirect_tqdm():
        for n, subject in enumerate(tqdm(subjects, unit='subject', leave=False), 1):

            LOGGER.info(f"------------------- Subject {n}/{len(subjects)} -------------------")
            personals = dict()
            subject   = rawfolder/subject.name.replace('sub-', subprefix.replace('*',''))     # TODO: This assumes e.g. that the subject-ids in the rawfolder did not contain BIDS-invalid characters (such as '_')
            sessions  = bidscoin.lsdirs(subject, (sesprefix if sesprefix!='*' else '') + '*')
            if not subject.is_dir():
                LOGGER.error(f"Could not find source-folder: {subject}")
                continue
            if not sessions:
                sessions = [subject]
            for session in sessions:

                # Only take data from the first session -> BIDS specification
                subid, sesid = bids.DataSource(session/'dum.my', subprefix='sub-', sesprefix='ses-').subid_sesid()
                if sesprefix and sesid and 'session_id' not in personals:
                    personals['session_id']         = sesid
                    participants_dict['session_id'] = {'Description': 'Session identifier'}

                # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file
                sesfolders, unpacked = bids.unpack(session)
                for sesfolder in sesfolders:

                    # Update / append the personal source data
                    LOGGER.info(f"Scanning session: {sesfolder}")
                    success = scanpersonals(bidsmap, sesfolder, personals)

                    # Clean-up the temporary unpacked data
                    if unpacked:
                        shutil.rmtree(sesfolder)

                    if success:
                        break

            # Store the collected personals in the participant_table. TODO: Check that only values that are consistent over sessions go in the participants.tsv file, otherwise put them in a sessions.tsv file
            for key in keys:
                if key not in participants_dict:
                    participants_dict[key] = dict(LongName    = 'Long (unabbreviated) name of the column',
                                                  Description = 'Description of the the column',
                                                  Levels      = dict(Key='Value (This is for categorical variables: a dictionary of possible values (keys) and their descriptions (values))'),
                                                  Units       = 'Measurement units. [<prefix symbol>]<unit symbol> format following the SI standard is RECOMMENDED')

                participants_table.loc[subid, key] = personals.get(key)

    # Write the collected data to the participant files
    LOGGER.info(f"Writing subject data to: {participants_tsv}")
    if not dryrun:
        participants_table.replace('','n/a').to_csv(participants_tsv, sep='\t', encoding='utf-8', na_rep='n/a')

    LOGGER.info(f"Writing subject data dictionary to: {participants_json}")
    if not dryrun:
        with participants_json.open('w') as json_fid:
            json.dump(participants_dict, json_fid, indent=4)

    print(participants_table)

    LOGGER.info('-------------- FINISHED! ------------')
    LOGGER.info('')

    bidscoin.reporterrors()
Ejemplo n.º 22
0
    def analyze(
        self,
        detector,  # fer.FER instance
        display: bool = False,
        output: str = "csv",
        frequency: Optional[int] = None,
        max_results: int = None,
        save_fps: Optional[int] = None,
        video_id: Optional[str] = None,
        save_frames: bool = True,
        save_video: bool = True,
        annotate_frames: bool = True,
        zip_images: bool = True,
        detection_box: Optional[dict] = None,
    ) -> list:
        """Recognize facial expressions in video using `detector`.

        Args:

            detector (fer.FER): facial expression recognizer
            display (bool): show images with cv2.imshow
            output (str): csv or pandas
            frequency (int): inference on every nth frame (higher number is faster)
            max_results (int): number of frames to run inference before stopping
            save_fps (bool): inference frequency = video fps // save_fps
            video_id (str): filename for saving
            save_frames (bool): saves frames to directory
            save_video (bool): saves output video
            annotate_frames (bool): add emotion labels
            zip_images (bool): compress output
            detection_box (dict): dict with bounding box for subimage (xmin, xmax, ymin, ymax)

        Returns:

            data (list): list of results

        """
        frames_emotions = []
        if frequency is None:
            frequency = 1
        else:
            frequency = int(frequency)

        self.display = display
        self.save_frames = save_frames
        self.save_video = save_video
        self.annotate_frames = annotate_frames

        results_nr = 0

        # Open video
        assert self.cap.open(self.filepath), "Video capture not opening"
        self.__emotions = detector._get_labels().items()
        self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
        pos_frames = self.cap.get(cv2.CAP_PROP_POS_FRAMES)
        assert int(pos_frames) == 0, "Video not at index 0"

        self.frameCount = 0
        height, width = (
            int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
            int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
        )

        fps = self.cap.get(cv2.CAP_PROP_FPS)
        length = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
        assert fps and length, "File {} not loaded".format(self.filepath)

        if save_fps is not None:
            frequency = fps // save_fps
            log.info("Saving every {} frames".format(frequency))

        log.info("{:.2f} fps, {} frames, {:.2f} seconds".format(
            fps, length, length / fps))

        if self.save_frames:
            os.makedirs(self.outdir, exist_ok=True)
            log.info(f"Making directories at {self.outdir}")
        root, ext = os.path.splitext(os.path.basename(self.filepath))
        outfile = os.path.join(self.outdir, f"{root}_output{ext}")

        if save_video:
            self.videowriter = self._save_video(outfile, fps, width, height)

        with logging_redirect_tqdm():
            pbar = tqdm(total=length, unit="frames")

        while self.cap.isOpened():
            ret, frame = self.cap.read()
            if not ret:  # end of video
                break

            if frame is None:
                log.warn("Empty frame")
                continue

            if self.frameCount % frequency != 0:
                self.frameCount += 1
                continue

            if detection_box is not None:
                frame = self._crop(frame, detection_box)

            # Get faces and detect emotions; coordinates are for unpadded frame
            try:
                faces = detector.detect_emotions(frame)
            except Exception as e:
                log.error(e)
                break

            # Offset detection_box to include padding
            if detection_box is not None:
                faces = self._offset_detection_box(faces, detection_box)

            self._increment_frames(frame, faces, video_id, root)

            if cv2.waitKey(1) & 0xFF == ord("q"):
                break

            if faces:
                frames_emotions.append(faces)

            results_nr += 1
            if max_results and results_nr > max_results:
                break

            pbar.update(1)

        pbar.close()
        self._close_video(outfile, save_frames, zip_images)
        return self.to_format(frames_emotions, output)
Ejemplo n.º 23
0
def bidsmapper(rawfolder: str, bidsfolder: str, bidsmapfile: str, templatefile: str, plugins: list, subprefix: str, sesprefix: str, store: bool=False, noedit: bool=False, force: bool=False) -> None:
    """
    Main function that processes all the subjects and session in the sourcefolder
    and that generates a maximally filled-in bidsmap.yaml file in bidsfolder/code/bidscoin.
    Folders in sourcefolder are assumed to contain a single dataset.

    :param rawfolder:       The root folder-name of the sub/ses/data/file tree containing the source data files
    :param bidsfolder:      The name of the BIDS root folder
    :param bidsmapfile:     The name of the bidsmap YAML-file
    :param templatefile:    The name of the bidsmap template YAML-file
    :param plugins:         Optional list of plugins that should be used (overrules the list in the study/template bidsmaps)
    :param subprefix:       The prefix common for all source subject-folders
    :param sesprefix:       The prefix common for all source session-folders
    :param store:           If True, the provenance samples will be stored
    :param noedit:          The bidseditor will not be launched if True
    :param force:           If True, the previous bidsmap and logfiles will be deleted
    :return:
    """

    # Input checking
    rawfolder      = Path(rawfolder).resolve()
    bidsfolder     = Path(bidsfolder).resolve()
    bidsmapfile    = Path(bidsmapfile)
    templatefile   = Path(templatefile)
    bidscoinfolder = bidsfolder/'code'/'bidscoin'

    # Start logging
    if force:
        (bidscoinfolder/'bidsmapper.log').unlink(missing_ok=True)
    bidscoin.setup_logging(bidscoinfolder/'bidsmapper.log')
    LOGGER.info('')
    LOGGER.info('-------------- START BIDSmapper ------------')
    LOGGER.info(f">>> bidsmapper sourcefolder={rawfolder} bidsfolder={bidsfolder} bidsmap={bidsmapfile} "
                f"template={templatefile} plugins={plugins} subprefix={subprefix} sesprefix={sesprefix} store={store} force={force}")

    # Get the heuristics for filling the new bidsmap
    bidsmap_old, bidsmapfile = bids.load_bidsmap(bidsmapfile,  bidscoinfolder, plugins)
    template, _              = bids.load_bidsmap(templatefile, bidscoinfolder, plugins)

    # Create the new bidsmap as a copy / bidsmap skeleton with no datatype entries (i.e. bidsmap with empty lists)
    if force:
        bidsmapfile.unlink(missing_ok=True)
        bidsmap_old = {}
    if bidsmap_old:
        bidsmap_new = copy.deepcopy(bidsmap_old)
    else:
        bidsmap_new = copy.deepcopy(template)
    template['Options'] = bidsmap_new['Options']                # Always use the options of the new bidsmap
    bidscoindatatypes   = bidsmap_new['Options']['bidscoin'].get('datatypes',[])
    unknowndatatypes    = bidsmap_new['Options']['bidscoin'].get('unknowntypes',[])
    ignoredatatypes     = bidsmap_new['Options']['bidscoin'].get('ignoretypes',[])
    for dataformat in bidsmap_new:
        if dataformat in ('Options','PlugIns'): continue        # Handle legacy bidsmaps (-> 'PlugIns')
        for datatype in bidscoindatatypes + unknowndatatypes + ignoredatatypes:
            if bidsmap_new[dataformat].get(datatype):
                bidsmap_new[dataformat][datatype] = None

    # Store/retrieve the empty or user-defined sub-/ses-prefix
    subprefix, sesprefix = setprefix(bidsmap_new, subprefix, sesprefix)

    # Start with an empty skeleton if we didn't have an old bidsmap
    if not bidsmap_old:
        bidsmap_old = copy.deepcopy(bidsmap_new)
        bidsmapfile = bidscoinfolder/'bidsmap.yaml'

    # Import the data scanning plugins
    plugins = [bidscoin.import_plugin(plugin, ('bidsmapper_plugin',)) for plugin in bidsmap_new['Options']['plugins']]
    plugins = [plugin for plugin in plugins if plugin]          # Filter the empty items from the list
    if not plugins:
        LOGGER.warning(f"The plugins listed in your bidsmap['Options'] did not have a usable `bidsmapper_plugin` function, nothing to do")
        LOGGER.info('-------------- FINISHED! ------------')
        LOGGER.info('')
        return

    # Loop over all subjects and sessions and built up the bidsmap entries
    subjects = bidscoin.lsdirs(rawfolder, (subprefix if subprefix!='*' else '') + '*')
    if not subjects:
        LOGGER.warning(f'No subjects found in: {rawfolder/subprefix}*')
    with logging_redirect_tqdm():
        for n, subject in enumerate(tqdm(subjects, unit='subject', leave=False), 1):

            sessions = bidscoin.lsdirs(subject, (sesprefix if sesprefix!='*' else '') + '*')
            if not sessions or (subject/'DICOMDIR').is_file():
                sessions = [subject]
            for session in sessions:

                LOGGER.info(f"Mapping: {session} (subject {n}/{len(subjects)})")

                # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file
                sesfolders, unpacked = bids.unpack(session)
                for sesfolder in sesfolders:
                    if store:
                        store = {'source': sesfolder.parent.parent.parent.parent if unpacked else rawfolder.parent, 'target': bidscoinfolder/'provenance'}
                    else:
                        store = {}

                    # Run the bidsmapper plugins
                    for module in plugins:
                        LOGGER.info(f"Executing plugin: {Path(module.__file__).name} -> {sesfolder}")
                        module.bidsmapper_plugin(sesfolder, bidsmap_new, bidsmap_old, template, store)

                    # Clean-up the temporary unpacked data
                    if unpacked:
                        shutil.rmtree(sesfolder)

    # Save the new study bidsmap in the bidscoinfolder or launch the bidseditor UI_MainWindow
    if noedit:
        bids.save_bidsmap(bidsmapfile, bidsmap_new)

    else:
        LOGGER.info('Opening the bidseditor')
        app = QApplication(sys.argv)
        app.setApplicationName(f"{bidsmapfile} - BIDS editor {localversion}")

        mainwin = bidseditor.MainWindow(bidsfolder, bidsmap_new, template)
        mainwin.show()

        messagebox = QMessageBox(mainwin)
        messagebox.setText(f"The bidsmapper has finished scanning {rawfolder}\n\n"
                           f"Please carefully check all the different BIDS output names "
                           f"and BIDScoin options and (re)edit them to your needs.\n\n"
                           f"You can always redo this step later by re-running the "
                           f"bidsmapper or by just running the bidseditor tool\n\n"
                           f"{versionmessage}")
        messagebox.setWindowTitle('About the BIDS-mapping workflow')
        messagebox.setIconPixmap(QtGui.QPixmap(str(bidseditor.BIDSCOIN_LOGO)).scaled(150, 150, QtCore.Qt.KeepAspectRatio, QtCore.Qt.SmoothTransformation))
        messagebox.setWindowFlags(messagebox.windowFlags() & ~QtCore.Qt.WindowMinMaxButtonsHint)
        messagebox.show()

        app.exec()

    LOGGER.info('-------------- FINISHED! -------------------')
    LOGGER.info('')

    bidscoin.reporterrors()
Ejemplo n.º 24
0
def update(force: bool):
    """Update the data file."""
    with logging_redirect_tqdm():
        _update(force=force)
Ejemplo n.º 25
0
def medeface(bidsdir: str, pattern: str, maskpattern: str, subjects: list,
             force: bool, output: str, cluster: bool, nativespec: str,
             kwargs: dict):
    """

    :param bidsdir:     The bids-directory with the (multi-echo) subject data
    :param pattern:     Globlike search pattern (relative to the subject/session folder) to select the echo-images that need to be defaced, e.g. 'anat/*_T1w*'
    :param maskpattern: Globlike search pattern (relative to the subject/session folder) to select the images from which the defacemask is computed, e.g. 'anat/*_part-mag_*_T2starw*'. If not given then 'pattern' is used
    :param subjects:    List of sub-# identifiers to be processed (the sub- prefix can be left out). If not specified then all sub-folders in the bidsfolder will be processed
    :param force:       If True then images will be processed, regardless if images have already been defaced (i.e. if {"Defaced": True} in the json sidecar file)
    :param output:      Determines where the defaced images are saved. It can be the name of a BIDS datatype folder, such as 'anat', or of the derivatives folder, i.e. 'derivatives'. If output is left empty then the original images are replaced by the defaced images
    :param cluster:     Flag to submit the deface jobs to the high-performance compute (HPC) cluster
    :param nativespec:  DRMAA native specifications for submitting deface jobs to the HPC cluster
    :param kwargs:      Additional arguments (in dict/json-style) that are passed to pydeface. See examples for usage
    :return:
    """

    # Input checking
    bidsdir = Path(bidsdir).resolve()
    if not maskpattern:
        maskpattern = pattern

    # Start logging
    bidscoin.setup_logging(bidsdir / 'code' / 'bidscoin' / 'deface.log')
    LOGGER.info('')
    LOGGER.info('------------ START multi-echo deface ----------')
    LOGGER.info(
        f">>> medeface bidsfolder={bidsdir} pattern={pattern} subjects={subjects} output={output}"
        f" cluster={cluster} nativespec={nativespec} {kwargs}")

    # Get the list of subjects
    if not subjects:
        subjects = bidscoin.lsdirs(bidsdir, 'sub-*')
        if not subjects:
            LOGGER.warning(f"No subjects found in: {bidsdir/'sub-*'}")
    else:
        subjects = [
            'sub-' + subject.replace('sub-', '') for subject in subjects
        ]  # Make sure there is a "sub-" prefix
        subjects = [
            bidsdir / subject for subject in subjects
            if (bidsdir / subject).is_dir()
        ]

    # Prepare the HPC pydeface job submission
    with drmaa.Session() as pbatch:
        if cluster:
            jt = pbatch.createJobTemplate()
            jt.jobEnvironment = os.environ
            jt.remoteCommand = shutil.which('pydeface')
            jt.nativeSpecification = nativespec
            jt.joinFiles = True

        # Loop over bids subject/session-directories to first get all the echo-combined deface masks
        for n, subject in enumerate(subjects, 1):

            sessions = bidscoin.lsdirs(subject, 'ses-*')
            if not sessions:
                sessions = [subject]
            for session in sessions:

                LOGGER.info('--------------------------------------')
                LOGGER.info(f"Processing ({n}/{len(subjects)}): {session}")

                datasource = bids.DataSource(session / 'dum.my',
                                             subprefix='sub-',
                                             sesprefix='ses-')
                subid, sesid = datasource.subid_sesid()

                # Read the echo-images that will be combined to compute the deface mask
                echofiles = sorted([
                    match for match in session.glob(maskpattern)
                    if '.nii' in match.suffixes
                ])
                if not echofiles:
                    LOGGER.info(
                        f'No mask files found for: {session}/{maskpattern}')
                    continue

                # Check the json "Defaced" field to see if it has already been defaced
                if not force:
                    with echofiles[0].with_suffix('').with_suffix(
                            '.json').open('r') as fid:
                        jsondata = json.load(fid)
                    if jsondata.get('Defaced'):
                        LOGGER.info(
                            f"Skipping already defaced images: {[str(echofile) for echofile in echofiles]}"
                        )
                        continue

                LOGGER.info(
                    f'Loading mask files: {[str(echofile) for echofile in echofiles]}'
                )
                echos = [nib.load(echofile) for echofile in echofiles]

                # Create a temporary echo-combined image
                tmpfile = session / 'tmp_echocombined_deface.nii'
                combined = nib.Nifti1Image(
                    np.mean([echo.get_fdata() for echo in echos], axis=0),
                    echos[0].affine, echos[0].header)
                combined.to_filename(tmpfile)

                # Deface the echo-combined image
                LOGGER.info(
                    f"Creating a deface-mask from the echo-combined image: {tmpfile}"
                )
                if cluster:
                    jt.args = [
                        str(tmpfile), '--outfile',
                        str(tmpfile), '--force'
                    ] + [
                        item for pair in [[f"--{key}", val]
                                          for key, val in kwargs.items()]
                        for item in pair
                    ]
                    jt.jobName = f"pydeface_{subid}_{sesid}"
                    jobid = pbatch.runJob(jt)
                    LOGGER.info(
                        f"Your deface job has been submitted with ID: {jobid}")
                else:
                    pdu.deface_image(str(tmpfile),
                                     str(tmpfile),
                                     force=True,
                                     forcecleanup=True,
                                     **kwargs)

        if cluster:
            LOGGER.info('Waiting for the deface jobs to finish...')
            pbatch.synchronize(jobIds=[pbatch.JOB_IDS_SESSION_ALL],
                               timeout=pbatch.TIMEOUT_WAIT_FOREVER,
                               dispose=True)
            pbatch.deleteJobTemplate(jt)

    # Loop again over bids subject/session-directories to apply the deface masks and write meta-data
    with logging_redirect_tqdm():
        for n, subject in enumerate(
                tqdm(subjects, unit='subject', leave=False), 1):

            sessions = bidscoin.lsdirs(subject, 'ses-*')
            if not sessions:
                sessions = [subject]
            for session in sessions:

                LOGGER.info('--------------------------------------')
                LOGGER.info(f"Processing ({n}/{len(subjects)}): {session}")

                datasource = bids.DataSource(session / 'dum.my',
                                             subprefix='sub-',
                                             sesprefix='ses-')
                subid, sesid = datasource.subid_sesid()

                # Read the temporary defacemask
                tmpfile = session / 'tmp_echocombined_deface.nii'
                if not tmpfile.is_file():
                    LOGGER.info(f'No {tmpfile} file found')
                    continue
                defacemask = nib.load(tmpfile).get_fdata(
                ) != 0  # The original defacemask is saved in a temporary folder so it may be deleted -> use the defaced image to infer the mask
                tmpfile.unlink()

                # Process the echo-images that need to be defaced
                for echofile in sorted([
                        match for match in session.glob(pattern)
                        if '.nii' in match.suffixes
                ]):

                    # Construct the output filename and relative path name (used in BIDS)
                    echofile_rel = echofile.relative_to(session).as_posix()
                    if not output:
                        outputfile = echofile
                        outputfile_rel = echofile_rel
                    elif output == 'derivatives':
                        outputfile = bidsdir / 'derivatives' / 'deface' / subid / sesid / echofile.parent.name / echofile.name
                        outputfile_rel = outputfile.relative_to(
                            bidsdir).as_posix()
                    else:
                        outputfile = session / output / echofile.name
                        outputfile_rel = outputfile.relative_to(
                            session).as_posix()
                    outputfile.parent.mkdir(parents=True, exist_ok=True)

                    # Apply the defacemask
                    LOGGER.info(
                        f'Applying deface mask on: {echofile} -> {outputfile_rel}'
                    )
                    echoimg = nib.load(echofile)
                    outputimg = nib.Nifti1Image(
                        echoimg.get_fdata() * defacemask, echoimg.affine,
                        echoimg.header)
                    outputimg.to_filename(outputfile)

                    # Overwrite or add a json sidecar-file
                    inputjson = echofile.with_suffix('').with_suffix('.json')
                    outputjson = outputfile.with_suffix('').with_suffix(
                        '.json')
                    if inputjson.is_file() and inputjson != outputjson:
                        if outputjson.is_file():
                            LOGGER.info(
                                f"Overwriting the json sidecar-file: {outputjson}"
                            )
                            outputjson.unlink()
                        else:
                            LOGGER.info(
                                f"Adding a json sidecar-file: {outputjson}")
                        shutil.copyfile(inputjson, outputjson)

                    # Add a custom "Defaced" field to the json sidecar-file
                    with outputjson.open('r') as output_fid:
                        data = json.load(output_fid)
                    data['Defaced'] = True
                    with outputjson.open('w') as output_fid:
                        json.dump(data, output_fid, indent=4)

                    # Update the IntendedFor fields in the fieldmap sidecar-files NB: IntendedFor must be relative to the subject folder
                    if output and output != 'derivatives' and (
                            session / 'fmap').is_dir():
                        for fmap in (session / 'fmap').glob('*.json'):
                            with fmap.open('r') as fmap_fid:
                                fmap_data = json.load(fmap_fid)
                            intendedfor = fmap_data['IntendedFor']
                            if isinstance(intendedfor, str):
                                intendedfor = [intendedfor]
                            if (Path(sesid) /
                                    echofile_rel).as_posix() in intendedfor:
                                LOGGER.info(
                                    f"Updating 'IntendedFor' to {Path(sesid)/outputfile_rel} in {fmap}"
                                )
                                fmap_data['IntendedFor'] = intendedfor + [
                                    (Path(sesid) / outputfile_rel).as_posix()
                                ]
                                with fmap.open('w') as fmap_fid:
                                    json.dump(fmap_data, fmap_fid, indent=4)

                    # Update the scans.tsv file
                    if (bidsdir / '.bidsignore').is_file():
                        bidsignore = (bidsdir /
                                      '.bidsignore').read_text().splitlines()
                    else:
                        bidsignore = []
                    bidsignore.append('derivatives/')
                    scans_tsv = session / f"{subid}{bids.add_prefix('_',sesid)}_scans.tsv"
                    if output and output + '/' not in bidsignore and scans_tsv.is_file(
                    ):
                        LOGGER.info(f"Adding {outputfile_rel} to {scans_tsv}")
                        scans_table = pd.read_csv(scans_tsv,
                                                  sep='\t',
                                                  index_col='filename')
                        scans_table.loc[outputfile_rel] = scans_table.loc[
                            echofile_rel]
                        scans_table.sort_values(by=['acq_time', 'filename'],
                                                inplace=True)
                        scans_table.to_csv(scans_tsv,
                                           sep='\t',
                                           encoding='utf-8')

    LOGGER.info('-------------- FINISHED! -------------')
    LOGGER.info('')
Ejemplo n.º 26
0
        update_zenodo(PROPERTIES_RECORD, paths)


@main.command()
@verbose_option
@directory_option
@zenodo_option
@force_option
@no_strict_option
def xrefs(directory: str, zenodo: bool, force: bool, no_strict: bool):  # noqa: D202
    """Make the prefix-identifier-xref dump."""
    with logging_redirect_tqdm():
        paths = db_output_helper(
            _iter_xrefs,
            "xrefs",
            ("prefix", "identifier", "xref_prefix", "xref_identifier", "provenance"),
            directory=directory,
            force=force,
            strict=not no_strict,
            summary_detailed=(0, 2),  # second column corresponds to xref prefix
        )
    if zenodo:
        # see https://zenodo.org/record/4021477
        update_zenodo(JAVERT_RECORD, paths)


if __name__ == "__main__":
    logging.captureWarnings(True)
    with logging_redirect_tqdm():
        main()
Ejemplo n.º 27
0
def echocombine(bidsdir: str, pattern: str, subjects: list, output: str, algorithm: str, weights: list, force: bool=False):
    """

    :param bidsdir:     The bids-directory with the (multi-echo) subject data
    :param pattern:     Globlike recursive search pattern (relative to the subject/session folder) to select the first echo of the images that need to be combined, e.g. '*task-*echo-1*'
    :param subjects:    List of sub-# identifiers to be processed (the sub- prefix can be left out). If not specified then all sub-folders in the bidsfolder will be processed
    :param output:      Determines where the output is saved. It can be the name of a BIDS datatype folder, such as 'func', or of the derivatives folder, i.e. 'derivatives'. If output = [the name of the input datatype folder] then the original echo images are replaced by one combined image. If output is left empty then the combined image is saved in the input datatype folder and the original echo images are moved to the {unknowndatatype} folder
    :param algorithm:   Combination algorithm, either 'PAID', 'TE' or 'average'
    :param weights:     Weights for each echo
    :param force:       Boolean to overwrite existing ME target files
    :return:
    """

    # Input checking
    bidsdir = Path(bidsdir).resolve()

    # Start logging
    bidscoin.setup_logging(bidsdir/'code'/'bidscoin'/'echocombine.log')
    LOGGER.info('')
    LOGGER.info(f"--------- START echocombine ---------")
    LOGGER.info(f">>> echocombine bidsfolder={bidsdir} pattern={pattern} subjects={subjects} output={output}"
                f" algorithm={algorithm} weights={weights}")

    # Get the list of subjects
    if not subjects:
        subjects = bidscoin.lsdirs(bidsdir, 'sub-*')
        if not subjects:
            LOGGER.warning(f"No subjects found in: {bidsdir/'sub-*'}")
    else:
        subjects = ['sub-' + subject.replace('sub-', '') for subject in subjects]              # Make sure there is a "sub-" prefix
        subjects = [bidsdir/subject for subject in subjects if (bidsdir/subject).is_dir()]

    # Loop over bids subject/session-directories
    with logging_redirect_tqdm():
        for n, subject in enumerate(tqdm(subjects, unit='subject', leave=False), 1):

            sessions = bidscoin.lsdirs(subject, 'ses-*')
            if not sessions:
                sessions = [subject]
            for session in sessions:

                LOGGER.info('-------------------------------------')
                LOGGER.info(f"Combining echos for ({n}/{len(subjects)}): {session}")

                subid, sesid = bids.DataSource(session/'dum.my', subprefix='sub-', sesprefix='ses-').subid_sesid()

                # Search for multi-echo matches
                for match in sorted([match for match in session.rglob(pattern) if '.nii' in match.suffixes]):

                    # Check if it is normal/BIDS multi-echo data or that the echo-number is appended to the acquisition label (as done in BIDScoin)
                    if '_echo-' in match.name:
                        echonr      = bids.get_bidsvalue(match, 'echo')
                        mepattern   = bids.get_bidsvalue(match, 'echo', '*')                        # The pattern that selects all echos
                        cename      = match.name.replace(f"_echo-{echonr}", '')                     # The combined-echo output filename
                    elif '_acq-' in match.name and bids.get_bidsvalue(match, 'acq').split('e')[-1].isnumeric():
                        acq, echonr = bids.get_bidsvalue(match, 'acq').rsplit('e',1)
                        mepattern   = bids.get_bidsvalue(match, 'acq', acq + 'e*')                  # The pattern that selects all echos
                        cename      = match.name.replace(f"_acq-{acq}e{echonr}", f"_acq-{acq}")     # The combined-echo output filename
                        LOGGER.info(f"No 'echo' key-value pair found in the filename, using the 'acq-{acq}e{echonr}' pair instead (BIDScoin-style)")
                    else:
                        LOGGER.warning(f"No 'echo' encoding found in the filename, skipping: {match}")
                        continue
                    echos     = sorted(match.parent.glob(mepattern.name))
                    newechos  = [echo.parents[1]/unknowndatatype/echo.name for echo in echos]
                    if len(echos) == 1:
                        LOGGER.warning(f"Only one echo image found, nothing to do for: {match}")
                        continue

                    # Construct the combined-echo output filename and check if that file already exists
                    datatype = match.parent.name
                    if not output:
                        cefile = session/datatype/cename
                    elif output == 'derivatives':
                        cefile = bidsdir/'derivatives'/'multiecho'/subid/sesid/datatype/cename
                    else:
                        cefile = session/output/cename
                    cefile.parent.mkdir(parents=True, exist_ok=True)
                    if cefile.is_file() and not force:
                        LOGGER.warning(f"Outputfile {cefile} already exists, skipping: {match}")
                        continue

                    # Combine the multi-echo images
                    me.me_combine(mepattern, cefile, algorithm, weights, saveweights=False)

                    # (Re)move the original multi-echo images
                    if not output:
                        for echo, newecho in zip(echos, newechos):
                            LOGGER.info(f"Moving original echo image: {echo} -> {newecho}")
                            newecho.parent.mkdir(parents=True, exist_ok=True)
                            echo.replace(newecho)
                            echo.with_suffix('').with_suffix('.json').replace(newecho.with_suffix('').with_suffix('.json'))
                    elif output == datatype:
                        for echo in echos:
                            LOGGER.info(f"Removing original echo image: {echo}")
                            echo.unlink()
                            echo.with_suffix('').with_suffix('.json').unlink()

                    # Construct the path names relative to the session folder (as in the scans.tsv file)
                    oldechos_rel = [echo.relative_to(session).as_posix() for echo in echos]
                    newechos_rel = [echo.relative_to(session).as_posix() for echo in echos + newechos if echo.is_file()]
                    if output == 'derivatives':
                        cefile_rel = ''                 # A remote folder cannot be specified as IntendedFor :-(
                    else:
                        cefile_rel = cefile.relative_to(session).as_posix()

                    # Update the IntendedFor fields of the fieldmaps (i.e. remove the old echos, add the echo-combined image and, optionally, the new echos)
                    if output != 'derivatives' and (session/'fmap').is_dir():
                        for fmap in (session/'fmap').glob('*.json'):
                            with fmap.open('r') as fmap_fid:
                                metadata = json.load(fmap_fid)
                            intendedfor = metadata.get('IntendedFor', [])
                            if isinstance(intendedfor, str):
                                intendedfor = [intendedfor]
                            if sesid:                   # NB: IntendedFor is relative to the subject folder
                                intendedfor = [file.split(sesid+'/',1)[1] for file in intendedfor]
                            if oldechos_rel[0] in intendedfor:
                                LOGGER.info(f"Updating 'IntendedFor' in {fmap}")
                                relfiles                = [file for file in intendedfor if file not in oldechos_rel] + newechos_rel + [cefile_rel]
                                metadata['IntendedFor'] = [(Path(sesid)/relfile).as_posix() for relfile in relfiles]
                                with fmap.open('w') as fmap_fid:
                                    json.dump(metadata, fmap_fid, indent=4)

                    # Update the scans.tsv file
                    if (bidsdir/'.bidsignore').is_file():
                        bidsignore = (bidsdir/'.bidsignore').read_text().splitlines()
                    else:
                        bidsignore = [unknowndatatype + '/']
                    scans_tsv = session/f"{subid}{bids.add_prefix('_', sesid)}_scans.tsv"
                    if scans_tsv.is_file():

                        scans_table = pd.read_csv(scans_tsv, sep='\t', index_col='filename')
                        if oldechos_rel[0] in scans_table.index:
                            scans_table.loc['oldrow'] = scans_table.loc[oldechos_rel[0]]
                        elif 'acq_time' in scans_table:
                            with cefile.with_suffix('').with_suffix('.json').open('r') as fid:
                                metadata = json.load(fid)
                            date = scans_table.iloc[0]['acq_time'].split('T')[0]
                            scans_table.loc['oldrow', 'acq_time'] = f"{date}T{metadata.get('AcquisitionTime')}"
                        else:
                            scans_table.loc['oldrow'] = None

                        if output+'/' not in bidsignore + ['derivatives/'] and cefile.parent.name in bids.bidsdatatypes:
                            LOGGER.info(f"Adding '{cefile_rel}' to '{scans_tsv}'")
                            scans_table.loc[cefile_rel] = scans_table.loc['oldrow']

                        for echo in oldechos_rel + newechos_rel:
                            if echo in scans_table.index and not (session/echo).is_file():
                                LOGGER.info(f"Removing '{echo}' from '{scans_tsv}'")
                                scans_table.drop(echo, inplace=True)
                            elif echo not in scans_table.index and (session/echo).is_file() and echo.split('/')[0] in bids.bidsdatatypes:
                                LOGGER.info(f"Adding '{echo}' to '{scans_tsv}'")
                                scans_table.loc[echo] = scans_table.loc['oldrow']       # NB: Assuming that the echo-rows are all identical

                        scans_table.drop('oldrow', inplace=True)
                        scans_table.sort_values(by=['acq_time','filename'], inplace=True)
                        scans_table.replace('','n/a').to_csv(scans_tsv, sep='\t', encoding='utf-8', na_rep='n/a')
                        for scan in scans_table.index:
                            if not (session/scan).is_file():
                                LOGGER.warning(f"Found non-existent file '{scan}' in '{scans_tsv}'")

    LOGGER.info('-------------- FINISHED! -------------')
    LOGGER.info('')
Ejemplo n.º 28
0
def cli(
    input_csvs: List[TextIOWrapper],
    out_dir: Path,
    csdap_api_url: str,
    username: str,
    password: str,
    verbosity: int,
    concurrency: int,
    scene_ids: List[str],
    asset_types: List[str],
):
    """
    The CSDAP Bulk Download tool intends to make it easy to download many
    assets from an order placed within the CSDAP system.

    \b
    The Assets CSV must contain a header row with the following columns:
      - collection_id
      - scene_id
      - asset_type

    A user has the option to filter the csv file and only download a subset
    of files based on scene_id or asset_type.

    Note that a user is only granted access to download each file once.

    For more information on CSDAP, please visit https://csdap.earthdata.nasa.gov.
    For support, contact [email protected].
    """

    setup_logger(verbosity)

    csdap = CsdapClient(csdap_api_url)
    token = csdap.get_auth_token(username, password)

    with concurrent.futures.ThreadPoolExecutor(
        max_workers=concurrency, thread_name_prefix="CsdapDownload"
    ) as executor, logging_redirect_tqdm():

        logger.debug(
            "Creating threadpool with max_workers of %s", executor._max_workers
        )
        future_to_path = {}

        def log_results(future):
            path = future_to_path.pop(future)
            try:
                logger.info("%s: %s", path, future.result())
            except Exception as exc:
                if verbosity:
                    logger.exception("%s generated an exception: %s" % (path, exc))
                else:
                    logger.warn("%s: Failed to download", path)

        for input_csv in input_csvs:
            api_version = 2
            for row in csv.DictReader(input_csv):
                if "order_id" in row and api_version == 2:
                    logger.warn("Detected legacy CSV.")
                    api_version = 1

                base = Path(
                    row["order_id"] if api_version == 1 else row["collection_id"]
                )
                path = base / row["scene_id"] / row["asset_type"]

                # Filter rows
                if scene_ids and row["scene_id"].lower() not in scene_ids:
                    logger.debug("Skipping %s, does not pass scene_id filter", path)
                    continue
                if asset_types and row["asset_type"].lower() not in asset_types:
                    logger.debug("Skipping %s, does not pass asset_type filter", path)
                    continue

                # Schedule work
                future = executor.submit(
                    csdap.download_file,
                    path=path,
                    out_dir=out_dir,
                    token=token,
                    endpoint_version=api_version,
                )
                future_to_path[future] = path
                future.add_done_callback(log_results)

                # To avoid the memory overhead of scheduling the entire CSV as futures,
                # we wait will for some futures to complete before scheduling more
                if len(future_to_path) >= 2 * executor._max_workers:
                    logger.debug(
                        "Waiting for some downloads to finish before continuing to "
                        "process CSV rows..."
                    )
                    concurrent.futures.wait(
                        future_to_path, return_when=concurrent.futures.FIRST_COMPLETED
                    )

        # Log outstanding futures
        logger.debug(
            "All CSVs processed, waiting for remaining %s downloads to complete",
            len(future_to_path),
        )
        concurrent.futures.wait(
            future_to_path, return_when=concurrent.futures.ALL_COMPLETED
        )

        click.echo("Complete.")
Ejemplo n.º 29
0
def find_decoding_function_features(
        vw,
        functions,
        disable_progress=False) -> Tuple[Dict[int, Dict], Dict[int, str]]:
    decoding_candidate_functions: DefaultDict[
        int, Dict] = collections.defaultdict(dict)

    library_functions: Dict[int, str] = dict()

    pbar = tqdm.tqdm
    if disable_progress:
        logger.info("identifying decoding function features...")
        # do not use tqdm to avoid unnecessary side effects when caller intends
        # to disable progress completely
        pbar = lambda s, *args, **kwargs: s

    functions = sorted(functions)
    n_funcs = len(functions)

    pb = pbar(functions,
              desc="finding decoding function features",
              unit=" functions",
              postfix="skipped 0 library functions")
    with logging_redirect_tqdm(), redirecting_print_to_tqdm():
        for f in pb:
            function_address = int(f)

            if is_thunk_function(vw, function_address):
                continue

            if viv_utils.flirt.is_library_function(vw, function_address):
                # TODO handle j_j_j__free_base (lib function wrappers), e.g. 0x140035AF0 in d2ca76...
                # TODO ignore function called to by library functions
                function_name = viv_utils.get_function_name(
                    vw, function_address)
                logger.debug("skipping library function 0x%x (%s)",
                             function_address, function_name)
                library_functions[function_address] = function_name
                n_libs = len(library_functions)
                percentage = 100 * (n_libs / n_funcs)
                if isinstance(pb, tqdm.tqdm):
                    pb.set_postfix_str("skipped %d library functions (%d%%)" %
                                       (n_libs, percentage))
                continue

            f = viv_utils.Function(vw, function_address)

            function_data = {"meta": get_function_meta(f), "features": list()}

            # meta data features
            function_data["features"].append(
                BlockCount(function_data["meta"].get("block_count")))
            function_data["features"].append(
                InstructionCount(
                    function_data["meta"].get("instruction_count")))
            function_data["features"].append(
                Arguments(function_data["meta"].get("api",
                                                    []).get("arguments")))

            for feature in extract_function_features(f):
                function_data["features"].append(feature)

            for bb in f.basic_blocks:
                for feature in extract_basic_block_features(f, bb):
                    function_data["features"].append(feature)

                for insn in bb.instructions:
                    for feature in extract_insn_features(f, bb, insn):
                        function_data["features"].append(feature)

            for feature in abstract_features(function_data["features"]):
                function_data["features"].append(feature)

            function_data["score"] = get_function_score_weighted(
                function_data["features"])

            logger.debug("analyzed function 0x%x - total score: %f",
                         function_address, function_data["score"])
            for feat in function_data["features"]:
                logger.trace("  %s", feat)

            decoding_candidate_functions[function_address] = function_data

        return decoding_candidate_functions, library_functions
Ejemplo n.º 30
0
def bidscoiner(rawfolder: str,
               bidsfolder: str,
               subjects: list = (),
               force: bool = False,
               participants: bool = False,
               bidsmapfile: str = 'bidsmap.yaml') -> None:
    """
    Main function that processes all the subjects and session in the sourcefolder and uses the
    bidsmap.yaml file in bidsfolder/code/bidscoin to cast the data into the BIDS folder.

    :param rawfolder:       The root folder-name of the sub/ses/data/file tree containing the source data files
    :param bidsfolder:      The name of the BIDS root folder
    :param subjects:        List of selected subjects / participants (i.e. sub-# names / folders) to be processed (the sub- prefix can be removed). Otherwise all subjects in the sourcefolder will be selected
    :param force:           If True, subjects will be processed, regardless of existing folders in the bidsfolder. Otherwise existing folders will be skipped
    :param participants:    If True, subjects in particpants.tsv will not be processed (this could be used e.g. to protect these subjects from being reprocessed), also when force=True
    :param bidsmapfile:     The name of the bidsmap YAML-file. If the bidsmap pathname is relative (i.e. no "/" in the name) then it is assumed to be located in bidsfolder/code/bidscoin
    :return:                Nothing
    """

    # Input checking & defaults
    rawfolder = Path(rawfolder).resolve()
    bidsfolder = Path(bidsfolder).resolve()
    bidsmapfile = Path(bidsmapfile)

    # Start logging
    bidscoin.setup_logging(bidsfolder / 'code' / 'bidscoin' / 'bidscoiner.log')
    LOGGER.info('')
    LOGGER.info(
        f"-------------- START BIDScoiner {localversion}: BIDS {bidscoin.bidsversion()} ------------"
    )
    LOGGER.info(
        f">>> bidscoiner sourcefolder={rawfolder} bidsfolder={bidsfolder} subjects={subjects} force={force} participants={participants} bidsmap={bidsmapfile}"
    )

    # Create a code/bidscoin subfolder
    (bidsfolder / 'code' / 'bidscoin').mkdir(parents=True, exist_ok=True)

    # Create a dataset description file if it does not exist
    dataset_file = bidsfolder / 'dataset_description.json'
    generatedby = [{
        "Name": "BIDScoin",
        "Version": localversion,
        "CodeURL": "https://github.com/Donders-Institute/bidscoin"
    }]
    if not dataset_file.is_file():
        LOGGER.info(f"Creating dataset description file: {dataset_file}")
        dataset_description = {
            "Name":
            "REQUIRED. Name of the dataset",
            "GeneratedBy":
            generatedby,
            "BIDSVersion":
            str(bidscoin.bidsversion()),
            "DatasetType":
            "raw",
            "License":
            "RECOMMENDED. The license for the dataset. The use of license name abbreviations is RECOMMENDED for specifying a license. The corresponding full license text MAY be specified in an additional LICENSE file",
            "Authors": [
                "OPTIONAL. List of individuals who contributed to the creation/curation of the dataset"
            ],
            "Acknowledgements":
            "OPTIONAL. Text acknowledging contributions of individuals or institutions beyond those listed in Authors or Funding",
            "HowToAcknowledge":
            "OPTIONAL. Instructions how researchers using this dataset should acknowledge the original authors. This field can also be used to define a publication that should be cited in publications that use the dataset",
            "Funding":
            ["OPTIONAL. List of sources of funding (grant numbers)"],
            "EthicsApprovals": [
                "OPTIONAL. List of ethics committee approvals of the research protocols and/or protocol identifiers"
            ],
            "ReferencesAndLinks": [
                "OPTIONAL. List of references to publication that contain information on the dataset, or links",
                "https://github.com/Donders-Institute/bidscoin"
            ],
            "DatasetDOI":
            "OPTIONAL. The Document Object Identifier of the dataset (not the corresponding paper)"
        }
    else:
        with dataset_file.open('r') as fid:
            dataset_description = json.load(fid)
        if 'BIDScoin' not in [
                generatedby_['Name']
                for generatedby_ in dataset_description.get('GeneratedBy', [])
        ]:
            LOGGER.info(f"Adding {generatedby} to {dataset_file}")
            dataset_description['GeneratedBy'] = dataset_description.get(
                'GeneratedBy', []) + generatedby
    with dataset_file.open('w') as fid:
        json.dump(dataset_description, fid, indent=4)

    # Create a README file if it does not exist
    readme_file = bidsfolder / 'README'
    if not readme_file.is_file():
        LOGGER.info(f"Creating README file: {readme_file}")
        readme_file.write_text(
            f"A free form text ( README ) describing the dataset in more details that SHOULD be provided\n\n"
            f"The raw BIDS data was created using BIDScoin {localversion}\n"
            f"All provenance information and settings can be found in ./code/bidscoin\n"
            f"For more information see: https://github.com/Donders-Institute/bidscoin\n"
        )

    # Get the bidsmap heuristics from the bidsmap YAML-file
    bidsmap, _ = bids.load_bidsmap(bidsmapfile,
                                   bidsfolder / 'code' / 'bidscoin')
    dataformats = [
        dataformat for dataformat in bidsmap
        if dataformat and dataformat not in ('Options', 'PlugIns')
    ]  # Handle legacy bidsmaps (-> 'PlugIns')
    if not bidsmap:
        LOGGER.error(
            f"No bidsmap file found in {bidsfolder}. Please run the bidsmapper first and/or use the correct bidsfolder"
        )
        return

    # Load the data conversion plugins
    plugins = [
        bidscoin.import_plugin(plugin, ('bidscoiner_plugin', ))
        for plugin, options in bidsmap['Options']['plugins'].items()
    ]
    plugins = [plugin for plugin in plugins
               if plugin]  # Filter the empty items from the list
    if not plugins:
        LOGGER.warning(
            f"The plugins listed in your bidsmap['Options'] did not have a usable `bidscoiner_plugin` function, nothing to do"
        )
        LOGGER.info('-------------- FINISHED! ------------')
        LOGGER.info('')
        return

    # Append options to the .bidsignore file
    bidsignore_items = [
        item.strip()
        for item in bidsmap['Options']['bidscoin']['bidsignore'].split(';')
    ]
    bidsignore_file = bidsfolder / '.bidsignore'
    if bidsignore_items:
        LOGGER.info(f"Writing {bidsignore_items} entries to {bidsignore_file}")
        if bidsignore_file.is_file():
            bidsignore_items += bidsignore_file.read_text().splitlines()
        with bidsignore_file.open('w') as bidsignore:
            for item in set(bidsignore_items):
                bidsignore.write(item + '\n')

    # Get the table & dictionary of the subjects that have been processed
    participants_tsv = bidsfolder / 'participants.tsv'
    if participants_tsv.is_file():
        participants_table = pd.read_csv(participants_tsv, sep='\t')
        participants_table.set_index(['participant_id'],
                                     verify_integrity=True,
                                     inplace=True)
    else:
        participants_table = pd.DataFrame()
        participants_table.index.name = 'participant_id'

    # Get the list of subjects
    subprefix = bidsmap['Options']['bidscoin']['subprefix'].replace('*', '')
    sesprefix = bidsmap['Options']['bidscoin']['sesprefix'].replace('*', '')
    if not subjects:
        subjects = bidscoin.lsdirs(
            rawfolder, (subprefix if subprefix != '*' else '') + '*')
        if not subjects:
            LOGGER.warning(f"No subjects found in: {rawfolder/subprefix}*")
    else:
        subjects = [
            rawfolder / (subprefix + re.sub(f"^{subprefix}", '', subject))
            for subject in subjects
        ]  # Make sure there is a sub-prefix

    # Loop over all subjects and sessions and convert them using the bidsmap entries
    with logging_redirect_tqdm():
        for n, subject in enumerate(
                tqdm(subjects, unit='subject', leave=False), 1):

            LOGGER.info(
                f"------------------- Subject {n}/{len(subjects)} -------------------"
            )
            if participants and subject.name in list(participants_table.index):
                LOGGER.info(
                    f"Skipping subject: {subject} ({n}/{len(subjects)})")
                continue
            if not subject.is_dir():
                LOGGER.warning(
                    f"The '{subject}' subject folder does not exist")
                continue

            sessions = bidscoin.lsdirs(
                subject, (sesprefix if sesprefix != '*' else '') + '*')
            if not sessions or (subject / 'DICOMDIR').is_file():
                sessions = [subject]
            for session in sessions:

                # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file
                sesfolders, unpacked = bids.unpack(session)
                for sesfolder in sesfolders:

                    # Check if we should skip the session-folder
                    datasource = bids.get_datasource(
                        sesfolder, bidsmap['Options']['plugins'])
                    if not datasource.dataformat:
                        LOGGER.info(
                            f"No coinable datasources found in '{sesfolder}'")
                        continue
                    subid = bidsmap[datasource.dataformat]['subject']
                    sesid = bidsmap[datasource.dataformat]['session']
                    subid, sesid = datasource.subid_sesid(
                        subid, sesid if sesid else '')
                    bidssession = bidsfolder / subid / sesid  # TODO: Support DICOMDIR with multiple subjects (as in PYDICOMDIR)
                    if not force and bidssession.is_dir():
                        datatypes = []
                        for dataformat in dataformats:
                            for datatype in bidscoin.lsdirs(
                                    bidssession
                            ):  # See what datatypes we already have in the bids session-folder
                                if datatype.iterdir(
                                ) and bidsmap[dataformat].get(
                                        datatype.name
                                ):  # See if we are going to add data for this datatype
                                    datatypes.append(datatype.name)
                        if datatypes:
                            LOGGER.info(
                                f"Skipping processed session: {bidssession} already has {datatypes} data (you can carefully use the -f option to overrule)"
                            )
                            continue

                    LOGGER.info(f"Coining datasources in: {sesfolder}")
                    if bidssession.is_dir():
                        LOGGER.warning(
                            f"Existing BIDS output-directory found, which may result in duplicate data (with increased run-index). Make sure {bidssession} was cleaned-up from old data before (re)running the bidscoiner"
                        )
                    bidssession.mkdir(parents=True, exist_ok=True)

                    # Run the bidscoiner plugins
                    for module in plugins:
                        LOGGER.info(
                            f"Executing plugin: {Path(module.__file__).name}")
                        module.bidscoiner_plugin(sesfolder, bidsmap,
                                                 bidssession)

                    # Add the special fieldmap metadata (IntendedFor, B0FieldIdentifier, TE, etc)
                    addmetadata(bidssession, subid, sesid)

                    # Clean-up the temporary unpacked data
                    if unpacked:
                        shutil.rmtree(sesfolder)

    # Re-read the participants_table and store the collected personals in the json sidecar-file
    if participants_tsv.is_file():
        participants_table = pd.read_csv(participants_tsv, sep='\t')
        participants_table.set_index(['participant_id'],
                                     verify_integrity=True,
                                     inplace=True)
    participants_json = participants_tsv.with_suffix('.json')
    participants_dict = {}
    if participants_json.is_file():
        with participants_json.open('r') as json_fid:
            participants_dict = json.load(json_fid)
    if not participants_dict.get('participant_id'):
        participants_dict['participant_id'] = {
            'Description': 'Unique participant identifier'
        }
    if not participants_dict.get(
            'session_id') and 'session_id' in participants_table.columns:
        participants_dict['session_id'] = {'Description': 'Session identifier'}
    newkey = False
    for col in participants_table.columns:
        if col not in participants_dict:
            newkey = True
            participants_dict[col] = dict(
                LongName='Long (unabbreviated) name of the column',
                Description='Description of the the column',
                Levels=dict(
                    Key=
                    'Value (This is for categorical variables: a dictionary of possible values (keys) and their descriptions (values))'
                ),
                Units=
                'Measurement units. [<prefix symbol>]<unit symbol> format following the SI standard is RECOMMENDED'
            )

    # Write the collected data to the participant files
    if newkey:
        LOGGER.info(f"Writing subject meta data to: {participants_json}")
        with participants_json.open('w') as json_fid:
            json.dump(participants_dict, json_fid, indent=4)

    LOGGER.info('-------------- FINISHED! ------------')
    LOGGER.info('')

    bidscoin.reporterrors()