Example #1
0
def imp_url(
    self,
    url,
    out=None,
    fname=None,
    erepo=None,
    frozen=True,
    no_exec=False,
    desc=None,
    jobs=None,
):
    from dvc.dvcfile import Dvcfile
    from dvc.stage import Stage, create_stage, restore_meta

    out = resolve_output(url, out)
    path, wdir, out = resolve_paths(self, out)

    # NOTE: when user is importing something from within their own repository
    if (
        erepo is None
        and os.path.exists(url)
        and path_isin(os.path.abspath(url), self.root_dir)
    ):
        url = relpath(url, wdir)

    stage = create_stage(
        Stage,
        self,
        fname or path,
        wdir=wdir,
        deps=[url],
        outs=[out],
        erepo=erepo,
    )
    restore_meta(stage)
    if stage.can_be_skipped:
        return None

    if desc:
        stage.outs[0].desc = desc

    dvcfile = Dvcfile(self, stage.path)
    dvcfile.remove()

    try:
        self.check_modified_graph([stage])
    except OutputDuplicationError as exc:
        raise OutputDuplicationError(exc.output, set(exc.stages) - {stage})

    if no_exec:
        stage.ignore_outs()
    else:
        stage.run(jobs=jobs)

    stage.frozen = frozen

    dvcfile.dump(stage)

    return stage
Example #2
0
File: stage.py Project: jhhuh/dvc
    def create(
        self,
        single_stage: bool = False,
        validate: bool = True,
        fname: str = None,
        force: bool = False,
        **stage_data,
    ) -> Union["Stage", "PipelineStage"]:
        """Creates a stage.

        Args:
            single_stage: if true, the .dvc file based stage is created,
                fname is required in that case
            fname: name of the file to use, not used for dvc.yaml files
            validate: if true, the new created stage is checked against the
                stages in the repo. Eg: graph correctness,
                potential overwrites in dvc.yaml file (unless `force=True`).
            force: ignores overwrites in dvc.yaml file
            stage_data: Stage data to create from
                (see create_stage and loads_from for more information)
        """
        from dvc.stage import PipelineStage, Stage, create_stage, restore_meta
        from dvc.stage.exceptions import InvalidStageName
        from dvc.stage.utils import (
            is_valid_name,
            prepare_file_path,
            validate_kwargs,
        )

        stage_data = validate_kwargs(single_stage=single_stage,
                                     fname=fname,
                                     **stage_data)
        if single_stage:
            stage_cls = Stage
            path = fname or prepare_file_path(stage_data)
        else:
            path = PIPELINE_FILE
            stage_cls = PipelineStage
            stage_name = stage_data["name"]
            if not (stage_name and is_valid_name(stage_name)):
                raise InvalidStageName

        stage = create_stage(stage_cls,
                             repo=self.repo,
                             path=path,
                             **stage_data)
        if validate:
            if not force:
                from dvc.stage.utils import check_stage_exists

                check_stage_exists(self.repo, stage, stage.path)

            new_index = self.repo.index.add(stage)
            new_index.check_graph()

        restore_meta(stage)
        return stage
Example #3
0
def _create_stages(
    repo,
    targets,
    fname,
    pbar=None,
    external=False,
    glob=False,
    desc=None,
    transfer=False,
    **kwargs,
):
    from dvc.dvcfile import Dvcfile
    from dvc.stage import Stage, create_stage, restore_meta

    expanded_targets = glob_targets(targets, glob=glob)

    stages = []
    for out in Tqdm(
            expanded_targets,
            desc="Creating DVC files",
            disable=len(expanded_targets) < LARGE_DIR_SIZE,
            unit="file",
    ):
        if kwargs.get("out"):
            out = resolve_output(out, kwargs["out"])
        path, wdir, out = resolve_paths(repo,
                                        out,
                                        always_local=transfer
                                        and not kwargs.get("out"))
        stage = create_stage(
            Stage,
            repo,
            fname or path,
            wdir=wdir,
            outs=[out],
            external=external,
        )
        restore_meta(stage)
        Dvcfile(repo, stage.path).remove()
        if desc:
            stage.outs[0].desc = desc

        repo._reset()  # pylint: disable=protected-access

        if not stage:
            if pbar is not None:
                pbar.total -= 1
            continue

        stages.append(stage)
        if pbar is not None:
            pbar.update_msg(out)

    return stages
Example #4
0
def _create_stages(
    repo, targets, fname, pbar=None, external=False, glob=False, desc=None,
):
    from glob import iglob

    from dvc.stage import Stage, create_stage, restore_meta

    if glob:
        expanded_targets = [
            exp_target
            for target in targets
            for exp_target in iglob(target, recursive=True)
        ]
    else:
        expanded_targets = targets

    stages = []
    for out in Tqdm(
        expanded_targets,
        desc="Creating DVC-files",
        disable=len(expanded_targets) < LARGE_DIR_SIZE,
        unit="file",
    ):
        path, wdir, out = resolve_paths(repo, out)
        stage = create_stage(
            Stage,
            repo,
            fname or path,
            wdir=wdir,
            outs=[out],
            external=external,
        )
        restore_meta(stage)
        if stage.can_be_skipped:
            stage = None
        else:
            Dvcfile(repo, stage.path).remove()
            if desc:
                stage.outs[0].desc = desc

        repo._reset()  # pylint: disable=protected-access

        if not stage:
            if pbar is not None:
                pbar.total -= 1
            continue

        stages.append(stage)
        if pbar is not None:
            pbar.update_msg(out)

    return stages
Example #5
0
def imp_url(
    self,
    url,
    out=None,
    fname=None,
    erepo=None,
    frozen=True,
    no_exec=False,
    remote=None,
    to_remote=False,
    desc=None,
    jobs=None,
):
    from dvc.dvcfile import Dvcfile
    from dvc.stage import Stage, create_stage, restore_meta

    out = resolve_output(url, out)
    path, wdir, out = resolve_paths(self,
                                    out,
                                    always_local=to_remote and not out)

    if to_remote and no_exec:
        raise InvalidArgumentError(
            "--no-exec can't be combined with --to-remote")

    if not to_remote and remote:
        raise InvalidArgumentError(
            "--remote can't be used without --to-remote")

    # NOTE: when user is importing something from within their own repository
    if (erepo is None and os.path.exists(url)
            and path_isin(os.path.abspath(url), self.root_dir)):
        url = relpath(url, wdir)

    stage = create_stage(
        Stage,
        self,
        fname or path,
        wdir=wdir,
        deps=[url],
        outs=[out],
        erepo=erepo,
    )
    restore_meta(stage)

    if desc:
        stage.outs[0].desc = desc

    dvcfile = Dvcfile(self, stage.path)
    dvcfile.remove()

    try:
        new_index = self.index.add(stage)
        new_index.check_graph()
    except OutputDuplicationError as exc:
        raise OutputDuplicationError(exc.output, set(exc.stages) - {stage})

    if no_exec:
        stage.ignore_outs()
    elif to_remote:
        remote_odb = self.cloud.get_remote_odb(remote, "import-url")
        stage.outs[0].transfer(url, odb=remote_odb, jobs=jobs)
        stage.save_deps()
        stage.md5 = stage.compute_md5()
    else:
        stage.run(jobs=jobs)

    stage.frozen = frozen

    dvcfile.dump(stage)

    return stage
Example #6
0
def run(self, fname=None, no_exec=False, single_stage=False, **kwargs):
    from dvc.dvcfile import PIPELINE_FILE, Dvcfile
    from dvc.exceptions import InvalidArgumentError, OutputDuplicationError
    from dvc.stage import PipelineStage, Stage, create_stage, restore_meta
    from dvc.stage.exceptions import InvalidStageName

    if not kwargs.get("cmd"):
        raise InvalidArgumentError("command is not specified")

    stage_cls = PipelineStage
    path = PIPELINE_FILE
    stage_name = kwargs.get("name")

    if stage_name and single_stage:
        raise InvalidArgumentError(
            "`-n|--name` is incompatible with `--single-stage`")

    if stage_name and fname:
        raise InvalidArgumentError(
            "`--file` is currently incompatible with `-n|--name` "
            "and requires `--single-stage`")

    if not stage_name and not single_stage:
        raise InvalidArgumentError("`-n|--name` is required")

    if single_stage:
        kwargs.pop("name", None)
        stage_cls = Stage
        path = fname or _get_file_path(kwargs)
    else:
        if not is_valid_name(stage_name):
            raise InvalidStageName

    params = chunk_dict(parse_params_from_cli(kwargs.pop("params", [])))
    stage = create_stage(stage_cls,
                         repo=self,
                         path=path,
                         params=params,
                         **kwargs)
    restore_meta(stage)
    if kwargs.get("run_cache", True) and stage.can_be_skipped:
        return None

    dvcfile = Dvcfile(self, stage.path)
    try:
        if kwargs.get("force", True):
            with suppress(ValueError):
                self.stages.remove(stage)
        else:
            _check_stage_exists(dvcfile, stage)
        self.check_modified_graph([stage])
    except OutputDuplicationError as exc:
        raise OutputDuplicationError(exc.output, set(exc.stages) - {stage})

    if no_exec:
        stage.ignore_outs()
    else:
        stage.run(
            no_commit=kwargs.get("no_commit", False),
            run_cache=kwargs.get("run_cache", True),
        )

    dvcfile.dump(stage, update_lock=not no_exec)
    return stage