Ejemplo n.º 1
0
    def load(repo, fname):

        # it raises the proper exceptions by priority:
        # 1. when the file doesn't exists
        # 2. filename is not a dvc filename
        # 3. path doesn't represent a regular file
        Stage._check_file_exists(repo, fname)
        Stage._check_dvc_filename(fname)
        Stage._check_isfile(repo, fname)

        d = load_stage_file_fobj(repo.tree.open(fname), fname)

        Stage.validate(d, fname=os.path.relpath(fname))
        path = os.path.abspath(fname)

        stage = Stage(
            repo=repo,
            path=path,
            wdir=os.path.abspath(
                os.path.join(
                    os.path.dirname(path), d.get(Stage.PARAM_WDIR, ".")
                )
            ),
            cmd=d.get(Stage.PARAM_CMD),
            md5=d.get(Stage.PARAM_MD5),
            locked=d.get(Stage.PARAM_LOCKED, False),
        )

        stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, []))
        stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, []))

        return stage
Ejemplo n.º 2
0
    def load(project, fname):
        Stage._check_file_exists(fname)
        Stage._check_dvc_filename(fname)

        if not Stage.is_stage_file(fname):
            raise StageFileIsNotDvcFileError(fname)

        with open(fname, "r") as fd:
            d = yaml.safe_load(fd) or {}

        Stage.validate(d, fname=os.path.relpath(fname))

        stage = Stage(
            project=project,
            path=os.path.abspath(fname),
            cwd=os.path.dirname(os.path.abspath(fname)),
            cmd=d.get(Stage.PARAM_CMD),
            md5=d.get(Stage.PARAM_MD5),
            locked=d.get(Stage.PARAM_LOCKED, False),
        )

        stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, []))
        stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, []))

        return stage
Ejemplo n.º 3
0
    def load(repo, fname):
        Stage._check_file_exists(fname)
        Stage._check_dvc_filename(fname)

        if not Stage.is_stage_file(fname):
            raise StageFileIsNotDvcFileError(fname)

        d = load_stage_file(fname)

        Stage.validate(d, fname=os.path.relpath(fname))
        path = os.path.abspath(fname)

        stage = Stage(
            repo=repo,
            path=path,
            wdir=os.path.abspath(
                os.path.join(os.path.dirname(path),
                             d.get(Stage.PARAM_WDIR, "."))),
            cmd=d.get(Stage.PARAM_CMD),
            md5=d.get(Stage.PARAM_MD5),
            locked=d.get(Stage.PARAM_LOCKED, False),
        )

        stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, []))
        stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, []))

        return stage
Ejemplo n.º 4
0
    def _load_params(cls, stage, pipeline_params, lock_params=None):
        """
        File in pipeline file is expected to be in following format:
        ```
        params:
            - lr
            - train.epochs
            - params2.yaml:  # notice the filename
                - process.threshold
                - process.bow
        ```

        and, in lockfile, we keep it as following format:
        ```
        params:
          params.yaml:
            lr: 0.0041
            train.epochs: 100
          params2.yaml:
            process.threshold: 0.98
            process.bow:
            - 15000
            - 123
        ```

        So, here, we merge these two formats into one (ignoring one's only
        specified on lockfile but missing on pipeline file), and load the
        `ParamsDependency` for the given stage.

        In the list of `params` inside pipeline file, if any of the item is
        dict-like, the key will be treated as separate params file and it's
        values to be part of that params file, else, the item is considered
        as part of the `params.yaml` which is a default file.

        (From example above: `lr` is considered to be part of `params.yaml`
        whereas `process.bow` to be part of `params2.yaml`.)
        """
        res = defaultdict(lambda: defaultdict(dict))
        lock_params = lock_params or {}

        def get_value(file, param):
            return lock_params.get(file, {}).get(param)

        for key in pipeline_params:
            if isinstance(key, str):
                path = DEFAULT_PARAMS_FILE
                res[path][key] = get_value(path, key)
            elif isinstance(key, dict):
                path = first(key)
                for k in key[path]:
                    res[path][k] = get_value(path, k)

        stage.deps += dependency.loadd_from(
            stage,
            [{
                "path": key,
                "params": params
            } for key, params in res.items()],
        )
Ejemplo n.º 5
0
Archivo: loader.py Proyecto: urahua/dvc
 def load_stage(cls, dvcfile, d, stage_text):
     path, wdir = resolve_paths(dvcfile.path, d.get(Stage.PARAM_WDIR))
     stage = loads_from(Stage, dvcfile.repo, path, wdir, d)
     stage._stage_text = stage_text  # noqa, pylint:disable=protected-access
     stage.deps = dependency.loadd_from(stage,
                                        d.get(Stage.PARAM_DEPS) or [])
     stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS) or [])
     return stage
Ejemplo n.º 6
0
def test_loadd_from(dvc):
    stage = Stage(dvc)
    deps = loadd_from(stage, [{"params": PARAMS}])
    assert len(deps) == 1
    assert isinstance(deps[0], ParamsDependency)
    assert deps[0].def_path == ParamsDependency.DEFAULT_PARAMS_FILE
    assert deps[0].params == list(PARAMS.keys())
    assert deps[0].info == PARAMS
Ejemplo n.º 7
0
    def load_stage(cls, dvcfile, d, stage_text):
        from dvc.stage import Stage, loads_from

        path, wdir = resolve_paths(dvcfile.path, d.get(Stage.PARAM_WDIR))
        stage = loads_from(Stage, dvcfile.repo, path, wdir, d)
        stage._stage_text = stage_text
        stage.deps = dependency.loadd_from(stage,
                                           d.get(Stage.PARAM_DEPS) or [])
        stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS) or [])
        return stage
Ejemplo n.º 8
0
    def _load_params(cls, stage, pipeline_params):
        """
        File in pipeline file is expected to be in following format:
        ```
        params:
            - lr
            - train.epochs
            - params2.yaml:  # notice the filename
                - process.threshold
                - process.bow
        ```

        and, in lockfile, we keep it as following format:
        ```
        params:
          params.yaml:
            lr: 0.0041
            train.epochs: 100
          params2.yaml:
            process.threshold: 0.98
            process.bow:
            - 15000
            - 123
        ```
        In the list of `params` inside pipeline file, if any of the item is
        dict-like, the key will be treated as separate params file and it's
        values to be part of that params file, else, the item is considered
        as part of the `params.yaml` which is a default file.

        (From example above: `lr` is considered to be part of `params.yaml`
        whereas `process.bow` to be part of `params2.yaml`.)

        We only load the keys here, lockfile bears the values which are used
        to compare between the actual params from the file in the workspace.
        """
        res = defaultdict(list)
        for key in pipeline_params:
            if isinstance(key, str):
                path = DEFAULT_PARAMS_FILE
                res[path].append(key)
            elif isinstance(key, dict):
                path = first(key)
                res[path].extend(key[path])

        stage.deps.extend(
            dependency.loadd_from(
                stage,
                [
                    {"path": key, "params": params}
                    for key, params in res.items()
                ],
            )
        )
Ejemplo n.º 9
0
Archivo: stage.py Proyecto: k76853/dvc
    def loadd(project, d, path):
        Stage.validate(d)

        path = os.path.abspath(path)
        cwd = os.path.dirname(path)
        cmd = d.get(Stage.PARAM_CMD, None)
        md5 = d.get(Stage.PARAM_MD5, None)

        stage = Stage(project=project, path=path, cmd=cmd, cwd=cwd, md5=md5)

        stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, []))
        stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, []))

        return stage
Ejemplo n.º 10
0
    def _loadd(project, d, path):
        Stage.validate(d, fname=os.path.relpath(path))
        path = os.path.abspath(path)
        cwd = os.path.dirname(path)
        cmd = d.get(Stage.PARAM_CMD, None)
        md5 = d.get(Stage.PARAM_MD5, None)
        locked = d.get(Stage.PARAM_LOCKED, False)

        stage = Stage(project=project,
                      path=path,
                      cmd=cmd,
                      cwd=cwd,
                      md5=md5,
                      locked=locked)

        stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, []))
        stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, []))

        return stage
Ejemplo n.º 11
0
Archivo: stage.py Proyecto: sekmet/dvc
    def load(repo, fname):
        fname, tag = Stage._get_path_tag(fname)

        # it raises the proper exceptions by priority:
        # 1. when the file doesn't exists
        # 2. filename is not a DVC-file
        # 3. path doesn't represent a regular file
        Stage._check_file_exists(repo, fname)
        Stage._check_dvc_filename(fname)
        Stage._check_isfile(repo, fname)

        with repo.tree.open(fname) as fd:
            stage_text = fd.read()
        d = parse_stage(stage_text, fname)

        Stage.validate(d, fname=relpath(fname))
        path = os.path.abspath(fname)

        stage = Stage(
            repo=repo,
            path=path,
            wdir=os.path.abspath(
                os.path.join(
                    os.path.dirname(path), d.get(Stage.PARAM_WDIR, ".")
                )
            ),
            cmd=d.get(Stage.PARAM_CMD),
            md5=d.get(Stage.PARAM_MD5),
            locked=d.get(Stage.PARAM_LOCKED, False),
            tag=tag,
            always_changed=d.get(Stage.PARAM_ALWAYS_CHANGED, False),
            # We store stage text to apply updates to the same structure
            stage_text=stage_text,
        )

        stage.deps = dependency.loadd_from(
            stage, d.get(Stage.PARAM_DEPS) or []
        )
        stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS) or [])

        return stage
Ejemplo n.º 12
0
    def load(repo, fname):
        fname, tag = Stage._get_path_tag(fname)

        # it raises the proper exceptions by priority:
        # 1. when the file doesn't exists
        # 2. filename is not a DVC-file
        # 3. path doesn't represent a regular file
        Stage._check_file_exists(repo, fname)
        Stage._check_dvc_filename(fname)
        Stage._check_isfile(repo, fname)

        with repo.tree.open(fname) as fd:
            d = load_stage_fd(fd, fname)
        # Making a deepcopy since the original structure
        # looses keys in deps and outs load
        state = copy.deepcopy(d)

        Stage.validate(d, fname=relpath(fname))
        path = os.path.abspath(fname)

        stage = Stage(
            repo=repo,
            path=path,
            wdir=os.path.abspath(
                os.path.join(
                    os.path.dirname(path), d.get(Stage.PARAM_WDIR, ".")
                )
            ),
            cmd=d.get(Stage.PARAM_CMD),
            md5=d.get(Stage.PARAM_MD5),
            locked=d.get(Stage.PARAM_LOCKED, False),
            tag=tag,
            always_changed=d.get(Stage.PARAM_ALWAYS_CHANGED, False),
            state=state,
        )

        stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, []))
        stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, []))

        return stage