def test_globbedpaths_misses(path): gp = GlobbedPaths(["amiss"], pwd=path) eq_(gp.expand_strict(), []) eq_(gp.misses, ["amiss"]) eq_(gp.expand(include_misses=True), ["amiss"]) # miss at beginning gp = GlobbedPaths(["amiss", "*.txt", "*.dat"], pwd=path) eq_(gp.expand_strict(), ["1.txt", "3.txt", "2.dat"]) eq_(gp.expand(include_misses=True), ["amiss", "1.txt", "3.txt", "2.dat"]) # miss in middle gp = GlobbedPaths(["*.txt", "amiss", "*.dat"], pwd=path) eq_(gp.expand_strict(), ["1.txt", "3.txt", "2.dat"]) eq_(gp.misses, ["amiss"]) eq_(gp.expand(include_misses=True), ["1.txt", "3.txt", "amiss", "2.dat"]) # miss at end gp = GlobbedPaths(["*.txt", "*.dat", "amiss"], pwd=path) eq_(gp.expand_strict(), ["1.txt", "3.txt", "2.dat"]) eq_(gp.misses, ["amiss"]) eq_(gp.expand(include_misses=True), ["1.txt", "3.txt", "2.dat", "amiss"]) # miss at beginning, middle, and end gp = GlobbedPaths( ["amiss1", "amiss2", "*.txt", "amiss3", "*.dat", "amiss4"], pwd=path) eq_(gp.expand_strict(), ["1.txt", "3.txt", "2.dat"]) eq_(gp.misses, ["amiss1", "amiss2", "amiss3", "amiss4"]) eq_(gp.expand(include_misses=True), ["amiss1", "amiss2", "1.txt", "3.txt", "amiss3", "2.dat", "amiss4"]) # Property expands if needed. gp = GlobbedPaths(["amiss"], pwd=path) eq_(gp.misses, ["amiss"])
def test_globbedpaths_partial_matches(path): gp = GlobbedPaths([op.join("?dir", "*.txt"), "*.txt"], pwd=path) eq_(gp.expand_strict(), ["1.txt", "3.txt"]) expected_partial = ["adir" + op.sep, "bdir" + op.sep] eq_(gp.partial_hits, expected_partial) eq_(gp.expand(include_partial=True), expected_partial + ["1.txt", "3.txt"]) # Property expands if needed. gp = GlobbedPaths([op.join("?dir", "*.txt")], pwd=path) eq_(gp.partial_hits, expected_partial)
def run_command(cmd, dataset=None, inputs=None, outputs=None, expand=None, assume_ready=None, explicit=False, message=None, sidecar=None, dry_run=False, extra_info=None, rerun_info=None, extra_inputs=None, rerun_outputs=None, inject=False): """Run `cmd` in `dataset` and record the results. `Run.__call__` is a simple wrapper over this function. Aside from backward compatibility kludges, the only difference is that `Run.__call__` doesn't expose all the parameters of this function. The unexposed parameters are listed below. Parameters ---------- extra_info : dict, optional Additional information to dump with the json run record. Any value given here will take precedence over the standard run key. Warning: To avoid collisions with future keys added by `run`, callers should try to use fairly specific key names and are encouraged to nest fields under a top-level "namespace" key (e.g., the project or extension name). rerun_info : dict, optional Record from a previous run. This is used internally by `rerun`. extra_inputs : list, optional Inputs to use in addition to those specified by `inputs`. Unlike `inputs`, these will not be injected into the {inputs} format field. rerun_outputs : list, optional Outputs, in addition to those in `outputs`, determined automatically from a previous run. This is used internally by `rerun`. inject : bool, optional Record results as if a command was run, skipping input and output preparation and command execution. In this mode, the caller is responsible for ensuring that the state of the working tree is appropriate for recording the command's results. Yields ------ Result records for the run. """ if not cmd: lgr.warning("No command given") return rel_pwd = rerun_info.get('pwd') if rerun_info else None if rel_pwd and dataset: # recording is relative to the dataset pwd = normpath(opj(dataset.path, rel_pwd)) rel_pwd = relpath(pwd, dataset.path) else: pwd, rel_pwd = get_command_pwds(dataset) ds = require_dataset( dataset, check_installed=True, purpose='track command outcomes') ds_path = ds.path lgr.debug('tracking command output underneath %s', ds) if not (rerun_info or inject): # Rerun already takes care of this. # For explicit=True, we probably want to check whether any inputs have # modifications. However, we can't just do is_dirty(..., path=inputs) # because we need to consider subdatasets and untracked files. # MIH: is_dirty() is gone, but status() can do all of the above! if not explicit and ds.repo.dirty: yield get_status_dict( 'run', ds=ds, status='impossible', message=( 'clean dataset required to detect changes from command; ' 'use `datalad status` to inspect unsaved changes')) return cmd = normalize_command(cmd) inputs = GlobbedPaths(inputs, pwd=pwd, expand=expand in ["inputs", "both"]) extra_inputs = GlobbedPaths(extra_inputs, pwd=pwd, # Follow same expansion rules as `inputs`. expand=expand in ["inputs", "both"]) outputs = GlobbedPaths(outputs, pwd=pwd, expand=expand in ["outputs", "both"]) # ATTN: For correct path handling, all dataset commands call should be # unbound. They should (1) receive a string dataset argument, (2) receive # relative paths, and (3) happen within a chpwd(pwd) context. if not (inject or dry_run): with chpwd(pwd): for res in prepare_inputs( ds_path, [] if assume_ready in ["inputs", "both"] else inputs, # Ignore --assume-ready for extra_inputs. It's an unexposed # implementation detail that lets wrappers sneak in inputs. extra_inputs): yield res if assume_ready not in ["outputs", "both"]: if outputs: for res in _install_and_reglob(ds_path, outputs): yield res for res in _unlock_or_remove(ds_path, outputs.expand_strict()): yield res if rerun_outputs is not None: for res in _unlock_or_remove(ds_path, rerun_outputs): yield res else: # If an inject=True caller wants to override the exit code, they can do # so in extra_info. cmd_exitcode = 0 exc = None try: cmd_expanded = format_command( ds, cmd, pwd=pwd, dspath=ds_path, # Check if the command contains "{tmpdir}" to avoid creating an # unnecessary temporary directory in most but not all cases. tmpdir=mkdtemp(prefix="datalad-run-") if "{tmpdir}" in cmd else "", inputs=inputs, outputs=outputs) except KeyError as exc: yield get_status_dict( 'run', ds=ds, status='impossible', message=('command has an unrecognized placeholder: %s', exc)) return # amend commit message with `run` info: # - pwd if inside the dataset # - the command itself # - exit code of the command run_info = { 'cmd': cmd, 'chain': rerun_info["chain"] if rerun_info else [], 'inputs': inputs.paths, 'extra_inputs': extra_inputs.paths, 'outputs': outputs.paths, } if rel_pwd is not None: # only when inside the dataset to not leak information run_info['pwd'] = rel_pwd if ds.id: run_info["dsid"] = ds.id if extra_info: run_info.update(extra_info) if dry_run: yield get_status_dict( "run [dry-run]", ds=ds, status="ok", message="Dry run", run_info=run_info, dry_run_info=dict(cmd_expanded=cmd_expanded, pwd_full=pwd, inputs=inputs.expand(), outputs=outputs.expand())) return if not inject: cmd_exitcode, exc = _execute_command( cmd_expanded, pwd, expected_exit=rerun_info.get("exit", 0) if rerun_info else None) run_info['exit'] = cmd_exitcode # Re-glob to capture any new outputs. # # TODO: If a warning or error is desired when an --output pattern doesn't # have a match, this would be the spot to do it. if explicit or expand in ["outputs", "both"]: outputs.expand(refresh=True) run_info["outputs"] = outputs.paths record = json.dumps(run_info, indent=1, sort_keys=True, ensure_ascii=False) if sidecar is None: use_sidecar = ds.config.get('datalad.run.record-sidecar', default=False) use_sidecar = anything2bool(use_sidecar) else: use_sidecar = sidecar if use_sidecar: # record ID is hash of record itself from hashlib import md5 record_id = md5(record.encode('utf-8')).hexdigest() record_dir = ds.config.get('datalad.run.record-directory', default=op.join('.datalad', 'runinfo')) record_path = op.join(ds_path, record_dir, record_id) if not op.lexists(record_path): # go for compression, even for minimal records not much difference, despite offset cost # wrap in list -- there is just one record dump2stream([run_info], record_path, compressed=True) # compose commit message msg = u"""\ [DATALAD RUNCMD] {} === Do not change lines below === {} ^^^ Do not change lines above ^^^ """ msg = msg.format( message if message is not None else _format_cmd_shorty(cmd_expanded), '"{}"'.format(record_id) if use_sidecar else record) outputs_to_save = outputs.expand_strict() if explicit else None if outputs_to_save is not None and use_sidecar: outputs_to_save.append(record_path) do_save = outputs_to_save is None or outputs_to_save if not rerun_info and cmd_exitcode: if do_save: repo = ds.repo msg_path = relpath(opj(str(repo.dot_git), "COMMIT_EDITMSG")) with open(msg_path, "wb") as ofh: ofh.write(ensure_bytes(msg)) lgr.info("The command had a non-zero exit code. " "If this is expected, you can save the changes with " "'datalad save -d . -r -F %s'", msg_path) raise exc elif do_save: with chpwd(pwd): for r in Save.__call__( dataset=ds_path, path=outputs_to_save, recursive=True, message=msg, return_type='generator'): yield r