def load(repo, fname): # it raises the proper exceptions by priority: # 1. when the file doesn't exists # 2. filename is not a dvc filename # 3. path doesn't represent a regular file Stage._check_file_exists(repo, fname) Stage._check_dvc_filename(fname) Stage._check_isfile(repo, fname) d = load_stage_file_fobj(repo.tree.open(fname), fname) Stage.validate(d, fname=os.path.relpath(fname)) path = os.path.abspath(fname) stage = Stage( repo=repo, path=path, wdir=os.path.abspath( os.path.join( os.path.dirname(path), d.get(Stage.PARAM_WDIR, ".") ) ), cmd=d.get(Stage.PARAM_CMD), md5=d.get(Stage.PARAM_MD5), locked=d.get(Stage.PARAM_LOCKED, False), ) stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, [])) stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, [])) return stage
def load(project, fname): Stage._check_file_exists(fname) Stage._check_dvc_filename(fname) if not Stage.is_stage_file(fname): raise StageFileIsNotDvcFileError(fname) with open(fname, "r") as fd: d = yaml.safe_load(fd) or {} Stage.validate(d, fname=os.path.relpath(fname)) stage = Stage( project=project, path=os.path.abspath(fname), cwd=os.path.dirname(os.path.abspath(fname)), cmd=d.get(Stage.PARAM_CMD), md5=d.get(Stage.PARAM_MD5), locked=d.get(Stage.PARAM_LOCKED, False), ) stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, [])) stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, [])) return stage
def load(repo, fname): Stage._check_file_exists(fname) Stage._check_dvc_filename(fname) if not Stage.is_stage_file(fname): raise StageFileIsNotDvcFileError(fname) d = load_stage_file(fname) Stage.validate(d, fname=os.path.relpath(fname)) path = os.path.abspath(fname) stage = Stage( repo=repo, path=path, wdir=os.path.abspath( os.path.join(os.path.dirname(path), d.get(Stage.PARAM_WDIR, "."))), cmd=d.get(Stage.PARAM_CMD), md5=d.get(Stage.PARAM_MD5), locked=d.get(Stage.PARAM_LOCKED, False), ) stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, [])) stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, [])) return stage
def _load_params(cls, stage, pipeline_params, lock_params=None): """ File in pipeline file is expected to be in following format: ``` params: - lr - train.epochs - params2.yaml: # notice the filename - process.threshold - process.bow ``` and, in lockfile, we keep it as following format: ``` params: params.yaml: lr: 0.0041 train.epochs: 100 params2.yaml: process.threshold: 0.98 process.bow: - 15000 - 123 ``` So, here, we merge these two formats into one (ignoring one's only specified on lockfile but missing on pipeline file), and load the `ParamsDependency` for the given stage. In the list of `params` inside pipeline file, if any of the item is dict-like, the key will be treated as separate params file and it's values to be part of that params file, else, the item is considered as part of the `params.yaml` which is a default file. (From example above: `lr` is considered to be part of `params.yaml` whereas `process.bow` to be part of `params2.yaml`.) """ res = defaultdict(lambda: defaultdict(dict)) lock_params = lock_params or {} def get_value(file, param): return lock_params.get(file, {}).get(param) for key in pipeline_params: if isinstance(key, str): path = DEFAULT_PARAMS_FILE res[path][key] = get_value(path, key) elif isinstance(key, dict): path = first(key) for k in key[path]: res[path][k] = get_value(path, k) stage.deps += dependency.loadd_from( stage, [{ "path": key, "params": params } for key, params in res.items()], )
def load_stage(cls, dvcfile, d, stage_text): path, wdir = resolve_paths(dvcfile.path, d.get(Stage.PARAM_WDIR)) stage = loads_from(Stage, dvcfile.repo, path, wdir, d) stage._stage_text = stage_text # noqa, pylint:disable=protected-access stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS) or []) stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS) or []) return stage
def test_loadd_from(dvc): stage = Stage(dvc) deps = loadd_from(stage, [{"params": PARAMS}]) assert len(deps) == 1 assert isinstance(deps[0], ParamsDependency) assert deps[0].def_path == ParamsDependency.DEFAULT_PARAMS_FILE assert deps[0].params == list(PARAMS.keys()) assert deps[0].info == PARAMS
def load_stage(cls, dvcfile, d, stage_text): from dvc.stage import Stage, loads_from path, wdir = resolve_paths(dvcfile.path, d.get(Stage.PARAM_WDIR)) stage = loads_from(Stage, dvcfile.repo, path, wdir, d) stage._stage_text = stage_text stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS) or []) stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS) or []) return stage
def _load_params(cls, stage, pipeline_params): """ File in pipeline file is expected to be in following format: ``` params: - lr - train.epochs - params2.yaml: # notice the filename - process.threshold - process.bow ``` and, in lockfile, we keep it as following format: ``` params: params.yaml: lr: 0.0041 train.epochs: 100 params2.yaml: process.threshold: 0.98 process.bow: - 15000 - 123 ``` In the list of `params` inside pipeline file, if any of the item is dict-like, the key will be treated as separate params file and it's values to be part of that params file, else, the item is considered as part of the `params.yaml` which is a default file. (From example above: `lr` is considered to be part of `params.yaml` whereas `process.bow` to be part of `params2.yaml`.) We only load the keys here, lockfile bears the values which are used to compare between the actual params from the file in the workspace. """ res = defaultdict(list) for key in pipeline_params: if isinstance(key, str): path = DEFAULT_PARAMS_FILE res[path].append(key) elif isinstance(key, dict): path = first(key) res[path].extend(key[path]) stage.deps.extend( dependency.loadd_from( stage, [ {"path": key, "params": params} for key, params in res.items() ], ) )
def loadd(project, d, path): Stage.validate(d) path = os.path.abspath(path) cwd = os.path.dirname(path) cmd = d.get(Stage.PARAM_CMD, None) md5 = d.get(Stage.PARAM_MD5, None) stage = Stage(project=project, path=path, cmd=cmd, cwd=cwd, md5=md5) stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, [])) stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, [])) return stage
def _loadd(project, d, path): Stage.validate(d, fname=os.path.relpath(path)) path = os.path.abspath(path) cwd = os.path.dirname(path) cmd = d.get(Stage.PARAM_CMD, None) md5 = d.get(Stage.PARAM_MD5, None) locked = d.get(Stage.PARAM_LOCKED, False) stage = Stage(project=project, path=path, cmd=cmd, cwd=cwd, md5=md5, locked=locked) stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, [])) stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, [])) return stage
def load(repo, fname): fname, tag = Stage._get_path_tag(fname) # it raises the proper exceptions by priority: # 1. when the file doesn't exists # 2. filename is not a DVC-file # 3. path doesn't represent a regular file Stage._check_file_exists(repo, fname) Stage._check_dvc_filename(fname) Stage._check_isfile(repo, fname) with repo.tree.open(fname) as fd: stage_text = fd.read() d = parse_stage(stage_text, fname) Stage.validate(d, fname=relpath(fname)) path = os.path.abspath(fname) stage = Stage( repo=repo, path=path, wdir=os.path.abspath( os.path.join( os.path.dirname(path), d.get(Stage.PARAM_WDIR, ".") ) ), cmd=d.get(Stage.PARAM_CMD), md5=d.get(Stage.PARAM_MD5), locked=d.get(Stage.PARAM_LOCKED, False), tag=tag, always_changed=d.get(Stage.PARAM_ALWAYS_CHANGED, False), # We store stage text to apply updates to the same structure stage_text=stage_text, ) stage.deps = dependency.loadd_from( stage, d.get(Stage.PARAM_DEPS) or [] ) stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS) or []) return stage
def load(repo, fname): fname, tag = Stage._get_path_tag(fname) # it raises the proper exceptions by priority: # 1. when the file doesn't exists # 2. filename is not a DVC-file # 3. path doesn't represent a regular file Stage._check_file_exists(repo, fname) Stage._check_dvc_filename(fname) Stage._check_isfile(repo, fname) with repo.tree.open(fname) as fd: d = load_stage_fd(fd, fname) # Making a deepcopy since the original structure # looses keys in deps and outs load state = copy.deepcopy(d) Stage.validate(d, fname=relpath(fname)) path = os.path.abspath(fname) stage = Stage( repo=repo, path=path, wdir=os.path.abspath( os.path.join( os.path.dirname(path), d.get(Stage.PARAM_WDIR, ".") ) ), cmd=d.get(Stage.PARAM_CMD), md5=d.get(Stage.PARAM_MD5), locked=d.get(Stage.PARAM_LOCKED, False), tag=tag, always_changed=d.get(Stage.PARAM_ALWAYS_CHANGED, False), state=state, ) stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, [])) stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, [])) return stage