def format_command(dset, command, **kwds): """Plug in placeholders in `command`. Parameters ---------- dset : Dataset command : str or list `kwds` is passed to the `format` call. `inputs` and `outputs` are converted to GlobbedPaths if necessary. Returns ------- formatted command (str) """ command = normalize_command(command) sfmt = SequenceFormatter() for k, v in dset.config.items("datalad.run.substitutions"): sub_key = k.replace("datalad.run.substitutions.", "") if sub_key not in kwds: kwds[sub_key] = v for name in ["inputs", "outputs"]: io_val = kwds.pop(name, None) if not isinstance(io_val, GlobbedPaths): io_val = GlobbedPaths(io_val, pwd=kwds.get("pwd")) kwds[name] = list(map(quote_cmdlinearg, io_val.expand(dot=False))) return sfmt.format(command, **kwds)
def format_command(dset, command, **kwds): """Plug in placeholders in `command`. Parameters ---------- dset : Dataset command : str or list `kwds` is passed to the `format` call. `inputs` and `outputs` are converted to GlobbedPaths if necessary. Returns ------- formatted command (str) """ command = normalize_command(command) sfmt = SequenceFormatter() for k, v in dset.config.items("datalad.run.substitutions"): sub_key = k.replace("datalad.run.substitutions.", "") if sub_key not in kwds: kwds[sub_key] = v for name in ["inputs", "outputs"]: io_val = kwds.pop(name, None) if not isinstance(io_val, GlobbedPaths): io_val = GlobbedPaths(io_val, pwd=kwds.get("pwd")) kwds[name] = list(map(shlex_quote, io_val.expand(dot=False))) return sfmt.format(command, **kwds)
def format_command(command, **kwds): """Plug in placeholders in `command`. Parameters ---------- dset : Dataset command : str or list `kwds` is passed to the `format` call. Returns ------- formatted command (str) """ command = normalize_command(command) sfmt = SequenceFormatter() return sfmt.format(command, **kwds)
def format_command(command, **kwds): """Plug in placeholders in `command`. Parameters ---------- command : str or list `kwds` is passed to the `format` call. `inputs` and `outputs` are converted to GlobbedPaths if necessary. Returns ------- formatted command (str) """ command = normalize_command(command) sfmt = SequenceFormatter() for name in ["inputs", "outputs"]: io_val = kwds.pop(name, None) if not isinstance(io_val, GlobbedPaths): io_val = GlobbedPaths(io_val, pwd=kwds.get("pwd")) kwds[name] = io_val.expand(dot=False) return sfmt.format(command, **kwds)
def run_command(cmd, dataset=None, inputs=None, outputs=None, expand=None, message=None, rerun_info=None, rerun_outputs=None, sidecar=None): rel_pwd = rerun_info.get('pwd') if rerun_info else None if rel_pwd and dataset: # recording is relative to the dataset pwd = normpath(opj(dataset.path, rel_pwd)) rel_pwd = relpath(pwd, dataset.path) else: pwd, rel_pwd = get_command_pwds(dataset) ds = require_dataset( dataset, check_installed=True, purpose='tracking outcomes of a command') # not needed ATM #refds_path = ds.path # delayed imports from datalad.cmd import Runner lgr.debug('tracking command output underneath %s', ds) if not rerun_info and ds.repo.dirty: # Rerun already takes care of this. yield get_status_dict( 'run', ds=ds, status='impossible', message=('unsaved modifications present, ' 'cannot detect changes by command')) return cmd = normalize_command(cmd) inputs = GlobbedPaths(inputs, pwd=pwd, expand=expand in ["inputs", "both"]) if inputs: for res in ds.get(inputs.expand(full=True), on_failure="ignore"): yield res outputs = GlobbedPaths(outputs, pwd=pwd, expand=expand in ["outputs", "both"], warn=not rerun_info) if outputs: for res in _unlock_or_remove(ds, outputs.expand(full=True)): yield res if rerun_outputs is not None: # These are files we need to unlock/remove for a rerun that aren't # included in the explicit outputs. Unlike inputs/outputs, these are # full paths, so we can pass them directly to unlock. for res in _unlock_or_remove(ds, rerun_outputs): yield res sfmt = SequenceFormatter() cmd_expanded = sfmt.format(cmd, pwd=pwd, dspath=ds.path, inputs=inputs.expand(dot=False), outputs=outputs.expand(dot=False)) # we have a clean dataset, let's run things exc = None cmd_exitcode = None runner = Runner(cwd=pwd) try: lgr.info("== Command start (output follows) =====") runner.run( cmd_expanded, # immediate output log_online=True, # not yet sure what we should do with the command output # IMHO `run` itself should be very silent and let the command talk log_stdout=False, log_stderr=False, expect_stderr=True, expect_fail=True, # TODO stdin ) except CommandError as e: # strip our own info from the exception. The original command output # went to stdout/err -- we just have to exitcode in the same way exc = e cmd_exitcode = e.code if rerun_info and rerun_info.get("exit", 0) != cmd_exitcode: # we failed in a different way during a rerun. This can easily # happen if we try to alter a locked file # # TODO add the ability to `git reset --hard` the dataset tree on failure # we know that we started clean, so we could easily go back, needs gh-1424 # to be able to do it recursively raise exc lgr.info("== Command exit (modification check follows) =====") # amend commit message with `run` info: # - pwd if inside the dataset # - the command itself # - exit code of the command run_info = { 'cmd': cmd, 'exit': cmd_exitcode if cmd_exitcode is not None else 0, 'chain': rerun_info["chain"] if rerun_info else [], 'inputs': inputs.paths, 'outputs': outputs.paths, } if rel_pwd is not None: # only when inside the dataset to not leak information run_info['pwd'] = rel_pwd if ds.id: run_info["dsid"] = ds.id record = json.dumps(run_info, indent=1, sort_keys=True, ensure_ascii=False) use_sidecar = sidecar or ( sidecar is None and ds.config.get('datalad.run.record-sidecar', default=False)) if use_sidecar: # record ID is hash of record itself from hashlib import md5 record_id = md5(record.encode('utf-8')).hexdigest() record_dir = ds.config.get('datalad.run.record-directory', default=op.join('.datalad', 'runinfo')) record_path = op.join(ds.path, record_dir, record_id) if not op.lexists(record_path): # go for compression, even for minimal records not much difference, despite offset cost # wrap in list -- there is just one record dump2stream([run_info], record_path, compressed=True) # compose commit message msg = u"""\ [DATALAD RUNCMD] {} === Do not change lines below === {} ^^^ Do not change lines above ^^^ """ msg = msg.format( message if message is not None else _format_cmd_shorty(cmd), '"{}"'.format(record_id) if use_sidecar else record) msg = assure_bytes(msg) if not rerun_info and cmd_exitcode: msg_path = opj(relpath(ds.repo.repo.git_dir), "COMMIT_EDITMSG") with open(msg_path, "wb") as ofh: ofh.write(msg) lgr.info("The command had a non-zero exit code. " "If this is expected, you can save the changes with " "'datalad save -r -F%s .'", msg_path) raise exc else: for r in ds.add('.', recursive=True, message=msg): yield r