Example #1
0
def test_format_command_strip_leading_dashes(path):
    ds = Dataset(path).create()
    eq_(format_command(ds, ["--", "cmd", "--opt"]), "cmd --opt")
    eq_(format_command(ds, ["--"]), "")
    # Can repeat to escape.
    eq_(format_command(ds, ["--", "--", "ok"]), "-- ok")
    # String stays as is.
    eq_(format_command(ds, "--"), "--")
Example #2
0
def test_format_command_strip_leading_dashes(path):
    ds = Dataset(path).create()
    eq_(format_command(ds, ["--", "cmd", "--opt"]), "cmd --opt")
    eq_(format_command(ds, ["--"]), "")
    # Can repeat to escape.
    eq_(format_command(ds, ["--", "--", "ok"]), "-- ok")
    # String stays as is.
    eq_(format_command(ds, "--"), "--")
Example #3
0
def _datalad_format_command(ds, spec):
    """Adjust `spec` to use `datalad run`-style formatting.

    Create "*_array" keys and format commands with DataLad's `format_command`.
    """
    from datalad.interface.run import format_command
    # DataLad's GlobbedPaths _should_ be the same as ours, but let's use
    # DataLad's to avoid potential discrepancies with datalad-run's behavior.
    from datalad.interface.run import GlobbedPaths

    batch_parameters = spec.get("_resolved_batch_parameters") or [{}]
    spec["_command_array"] = []
    spec["_inputs_array"] = []
    spec["_outputs_array"] = []
    for cp in batch_parameters:
        fmt_kwds = {}
        for key in ["inputs", "outputs"]:
            if key in spec:
                parametrized = [io.format(p=cp) for io in spec[key]]
                gp = GlobbedPaths(parametrized)
                spec["_{}_array".format(key)].append(gp.expand(dot=False))
                fmt_kwds[key] = gp
        fmt_kwds["p"] = cp
        cmd_str = spec.get("_container_command_str",
                           spec["_resolved_command_str"])
        spec["_command_array"].append(format_command(ds, cmd_str, **fmt_kwds))

    exinputs = spec.get("_extra_inputs", [])
    spec["_extra_inputs_array"] = [exinputs] * len(batch_parameters)
Example #4
0
    def fn(dset, results):
        header = """\
#!/bin/sh
#
# This file was generated by running (the equivalent of)
#
#   datalad rerun --script={script}{since} {revision}
#
# in {ds}{path}\n"""
        ofh.write(
            header.format(
                script=script,
                since="" if since is None else " --since=" + since,
                revision=dset.repo.get_hexsha(revision),
                ds='dataset {} at '.format(dset.id) if dset.id else '',
                path=dset.path))

        for res in results:
            if res["status"] != "ok":
                yield res
                return

            if "run_info" not in res:
                continue

            run_info = res["run_info"]
            cmd = run_info["cmd"]

            expanded_cmd = format_command(
                dset, cmd,
                **dict(run_info,
                       dspath=dset.path,
                       pwd=op.join(dset.path, run_info["pwd"])))

            msg = res["run_message"]
            if msg == _format_cmd_shorty(expanded_cmd):
                msg = ''

            ofh.write("\n" + "".join("# " + ln
                                     for ln in msg.splitlines(True)) + "\n")
            commit_descr = dset.repo.describe(res["commit"])
            ofh.write('# (record: {})\n'.format(
                commit_descr if commit_descr else res["commit"]))

            ofh.write(expanded_cmd + "\n")
        if ofh is not sys.stdout:
            ofh.close()

        if ofh is sys.stdout:
            yield None
        else:
            yield get_status_dict("run",
                                  ds=dset,
                                  status="ok",
                                  path=script,
                                  message=("Script written to %s", script))
Example #5
0
def _datalad_format_command(ds, spec):
    """Adjust `spec` to use `datalad run`-style formatting.

    The "inputs", "outputs", and "command_str" keys in `spec` are replaced and
    the original are moved under the `*_unexpanded` key.
    """
    from datalad.interface.run import format_command
    from datalad.interface.run import GlobbedPaths

    fmt_kwds = {}
    for key in ["inputs", "outputs"]:
        if key in spec:
            spec["{}_unexpanded".format(key)] = spec[key]
            gp = GlobbedPaths(spec[key])
            spec[key] = gp.expand(dot=False)
            fmt_kwds[key] = gp

    cmd_expanded = format_command(ds, spec["command_str"], **fmt_kwds)
    spec["command_str_unexpanded"] = spec["command_str"]
    spec["command_str"] = cmd_expanded
Example #6
0
    def __call__(cmd=None,
                 dataset=None,
                 inputs=None,
                 outputs=None,
                 expand=None,
                 explicit=False,
                 message=None,
                 sidecar=None,
                 jobcfg='default',
                 submit=False):

        # TODO makes sure a different rel_pwd is handled properly on the remote end
        pwd, rel_pwd = get_command_pwds(dataset)

        ds = require_dataset(dataset,
                             check_installed=True,
                             purpose='preparing a remote command execution')

        try:
            cmd_expanded = format_command(ds,
                                          cmd,
                                          pwd=pwd,
                                          dspath=ds.path,
                                          inputs=inputs,
                                          outputs=outputs)
        except KeyError as exc:
            yield get_status_dict(
                'htcprepare',
                ds=ds,
                status='impossible',
                message=('command has an unrecognized placeholder: %s', exc))
            return

        transfer_files_list = ['pre.sh', 'post.sh']

        # where all the submission packs live
        subroot_dir = get_submissions_dir(ds)
        subroot_dir.mkdir(parents=True, exist_ok=True)

        # location of to-be-created submission
        submission_dir = ut.Path(
            tempfile.mkdtemp(prefix='submit_', dir=text_type(subroot_dir)))
        submission = submission_dir.name[7:]

        split_cmd = shlex.split(cmd_expanded)
        # is this a singularity job?
        singularity_job = get_singularity_jobspec(split_cmd)
        if not singularity_job:
            with (submission_dir / 'runner.sh').open('wb') as f:
                f.write(
                    resource_string('datalad_htcondor',
                                    'resources/scripts/runner_direct.sh'))
            job_args = split_cmd
        else:
            # link the container into the submission dir
            (submission_dir / 'singularity.simg').symlink_to(
                ut.Path(singularity_job[0]).resolve())
            transfer_files_list.append('singularity.simg')
            # arguments of the job
            job_args = singularity_job[1]
            job_args.insert(0, 'singularity.simg')

            # TODO conditional on run_as_user=false
            with (submission_dir / 'runner.sh').open('wb') as f:
                f.write(
                    resource_string(
                        'datalad_htcondor',
                        'resources/scripts/runner_singularity_anon.sh'))
        make_executable(submission_dir / 'runner.sh')

        # htcondor wants the log dir to exist at submit time
        # TODO ATM we only support a single job per cluster submission
        (submission_dir / 'job_0' / 'logs').mkdir(parents=True)

        # TODO make job pre/post script selection configurable
        with (submission_dir / 'pre.sh').open('wb') as f:
            f.write(
                resource_string('datalad_htcondor',
                                'resources/scripts/pre_posix_chirp.sh'))
        make_executable(submission_dir / 'pre.sh')

        with (submission_dir / 'post.sh').open('wb') as f:
            f.write(
                resource_string('datalad_htcondor',
                                'resources/scripts/post_posix.sh'))
        make_executable(submission_dir / 'post.sh')

        # API support selection (bound dataset methods and such)
        # internal import to avoid circularities
        from datalad.api import (
            rev_status as status, )

        inputs = GlobbedPaths(inputs, pwd=pwd)
        prepare_inputs(ds, inputs)

        # it could be that an input expression does not expand,
        # because it doesn't match anything. In such a case
        # we need to filter out such globs to not confuse
        # the status() call below that only takes real paths
        inputs = [p for p in inputs.expand(full=True) if op.lexists(p)]
        # now figure out what matches the remaining paths in the
        # entire repo and dump a list of files to transfer
        if inputs:
            with (submission_dir / 'input_files').open('w') as f:
                # TODO disable output renderer
                for p in ds.rev_status(
                        path=inputs,
                        # TODO do we really want that True? I doubt it
                        # this might pull in the world
                        recursive=False,
                        # we would have otherwise no idea
                        untracked='no',
                        result_renderer=None):
                    f.write(text_type(p['path']))
                    f.write(u'\0')
                transfer_files_list.append('input_files')

        if outputs:
            # write the output globs to a file for eval on the execute
            # side
            # XXX we may not want to eval them on the remote side
            # at all, however. This would make things different
            # than with local execute, where we also just write to
            # a dataset and do not have an additional filter
            (submission_dir / 'output_globs').write_text(
                # we need a final trailing delimiter as a terminator
                u'\0'.join(outputs) + u'\0')
            transfer_files_list.append('output_globs')

        (submission_dir /
         'source_dataset_location').write_text(text_type(ds.pathobj) + op.sep)
        transfer_files_list.append('source_dataset_location')

        with (submission_dir / 'cluster.submit').open('w') as f:
            f.write(
                submission_template.format(
                    executable='runner.sh',
                    # TODO if singularity_job else 'job.sh',
                    transfer_files_list=','.join(
                        op.join(op.pardir, f) for f in transfer_files_list),
                    **submission_defaults))

            f.write(u'\narguments = "{}"\nqueue\n'.format(
                # TODO deal with single quotes in the args
                ' '.join("'{}'".format(a) for a in job_args)))

        # dump the run command args into a file for re-use
        # when the result is merged
        # include even args that are already evaluated and
        # acted upon, to be able to convince `run` to create
        # a full run record that maybe could be re-run
        # locally
        json_py.dump(
            dict(
                cmd=cmd,
                inputs=inputs,
                outputs=outputs,
                expand=expand,
                explicit=explicit,
                message=message,
                sidecar=sidecar,
                # report the PWD to, to given `run` a chance
                # to be correct after the fact
                pwd=pwd,
            ),
            text_type(submission_dir / 'runargs.json'))

        # we use this file to inspect what state this submission is in
        (submission_dir / 'status').write_text(u'prepared')

        yield get_status_dict(action='htc_prepare',
                              status='ok',
                              refds=text_type(ds.pathobj),
                              submission=submission,
                              path=text_type(submission_dir),
                              logger=lgr)

        if submit:
            try:
                Runner(cwd=text_type(submission_dir)).run(
                    ['condor_submit', 'cluster.submit'],
                    log_stdout=False,
                    log_stderr=False,
                    expect_stderr=True,
                    expect_fail=True,
                )
                (submission_dir / 'status').write_text(u'submitted')
                yield get_status_dict(action='htc_submit',
                                      status='ok',
                                      submission=submission,
                                      refds=text_type(ds.pathobj),
                                      path=text_type(submission_dir),
                                      logger=lgr)
            except CommandError as e:
                yield get_status_dict(action='htc_submit',
                                      status='error',
                                      submission=submission,
                                      message=('condor_submit failed: %s',
                                               exc_str(e)),
                                      refds=text_type(ds.pathobj),
                                      path=text_type(submission_dir),
                                      logger=lgr)
    # checks on unrelated subtrees.
    subject_dir = op.join(dataset.path, "sub-{}".format(subject))
    participants = op.join(dataset.path, "participants.tsv")
    from datalad.interface.run import format_command
    # TODO: This pattern is likely incomplete. Also: run prob. needs to break
    # down format_command into smaller pieces (needs mere substitutions)
    # TODO: Post run issue. Globs in outputs need to be evaluted AFTER execution
    # (again). May not yet exist.

    outputs = [subject_dir, participants]
    task = dataset.config.get("datalad.run.substitutions.bids-task")
    if task and task != "None":
        outputs.append(
            op.join(
                dataset.path,
                format_command(dataset,
                               "task-{bids-task}_{bids-modality}.json")))
    # we expect location to be a directory (with DICOMS somewhere beneath)
    if not op.isdir(location):
        raise ValueError("%s is not a directory" % location)

    from datalad.utils import with_pathsep
    # append location with /* to specify inputs for containers-run
    # we need to get those files, but nothing from within a possible .datalad
    # for example
    inputs = [with_pathsep(location) + "*", rel_spec_path]

    run_results = list()
    with patch.dict('os.environ', {
            'HIRNI_STUDY_SPEC': rel_spec_path,
            'HIRNI_SPEC2BIDS_SUBJECT': subject
    }):