Example #1
0
def parse_py(text, path):
    """Parses text from .py file into Python structure."""
    with reraise(SyntaxError, PythonFileCorruptedError(path)):
        tree = ast.parse(text, filename=path)

    result = _ast_tree_to_dict(tree)
    return result
Example #2
0
    def fetch_external(self, paths: Iterable, **kwargs):
        """Fetch specified external repo paths into cache.

        Returns 3-tuple in the form
            (downloaded, failed, list(cache_infos))
        where cache_infos can be used as checkout targets for the
        fetched paths.
        """
        download_results = []
        failed = 0
        root = PathInfo(self.root_dir)

        paths = [root / path for path in paths]

        def download_update(result):
            download_results.append(result)

        hash_infos = []
        for path in paths:
            with reraise(FileNotFoundError, PathMissingError(path, self.url)):
                metadata = self.repo_tree.metadata(path)

            self._check_repo(path, metadata.repo)
            repo = metadata.repo
            hash_info = self._fetch_to_cache(path, repo, download_update,
                                             **kwargs)
            hash_infos.append(hash_info)

        return sum(download_results), failed, hash_infos
Example #3
0
def parse_yaml(text, path, typ="safe"):
    from ruamel.yaml import YAML
    from ruamel.yaml import YAMLError as _YAMLError

    yaml = YAML(typ=typ)
    with reraise(_YAMLError, YAMLFileCorruptedError(path)):
        return yaml.load(text) or {}
Example #4
0
    def _each_iter(self, key):
        err_message = f"Could not find '{key}' in foreach group '{self.name}'"
        with reraise(KeyError, EntryNotFound(err_message)):
            value = self.normalized_iterable[key]

        # NOTE: we need to use resolved iterable/foreach-data,
        # not the normalized ones to figure out whether to make item/key
        # available
        inserted = self._inserted_keys(self.resolved_iterable)
        temp_dict = {self.pair.value: value}
        key_str = self.pair.key
        if key_str in inserted:
            temp_dict[key_str] = key

        with self.context.set_temporarily(temp_dict, reserve=True):
            # optimization: item and key can be removed on __exit__() as they
            # are top-level values, and are not merged recursively.
            # This helps us avoid cloning context, which is slower
            # (increasing the size of the context might increase
            # the no. of items to be generated which means more cloning,
            # i.e. quadratic complexity).
            generated = self._generate_name(key)
            entry = EntryDefinition(self.resolver, self.context, generated,
                                    self.do_definition)
            try:
                # optimization: skip checking for syntax errors on each foreach
                # generated stages. We do it once when accessing do_definition.
                return entry.resolve_stage(skip_checks=True)
            except ContextError as exc:
                format_and_raise(
                    exc,
                    f"stage '{generated}'",
                    self.relpath,
                )
Example #5
0
    def get_checksum(self, path):
        path_info = PathInfo(self.root_dir) / path
        with reraise(FileNotFoundError, PathMissingError(path, self.url)):
            metadata = self.repo_tree.metadata(path_info)

        # skip subrepos to check for
        tree = self._get_tree_for(metadata.repo)
        return tree.get_hash(path_info)
Example #6
0
def parse_py_for_update(text, path):
    """Parses text into dict for update params."""
    with reraise(SyntaxError, PythonFileCorruptedError(path)):
        tree = ast.parse(text, filename=path)

    result = _ast_tree_to_dict(tree)
    result.update({_PARAMS_KEY: _ast_tree_to_dict(tree, lineno=True)})
    result.update({_PARAMS_TEXT_KEY: text})
    return result
Example #7
0
def parse_yaml(text, path, typ="safe"):
    yaml = YAML(typ=typ)
    try:
        with reraise(_YAMLError, YAMLFileCorruptedError(path)):
            return yaml.load(text) or {}
    except DuplicateKeyError as exc:
        # NOTE: unfortunately this one doesn't inherit from YAMLError, so we
        # have to catch it by-hand. See
        # https://yaml.readthedocs.io/en/latest/api.html#duplicate-keys
        raise YAMLError(path, exc.problem)
Example #8
0
File: data.py Project: sanpreet/dvc
    def to_datapoints(self, **kwargs):
        with reraise(
            [ParseError, csv.Error],
            PlotParsingError(self.filename, self.revision),
        ):
            data = self.raw(**kwargs)

        for data_proc in self._processors():
            data = data_proc(
                data, filename=self.filename, revision=self.revision, **kwargs
            )
        return data
Example #9
0
def parse_yaml(text, path, typ="safe"):
    yaml = YAML(typ=typ)
    with reraise(YAMLError, YAMLFileCorruptedError(path)):
        result = yaml.load(text) or {}

    if "vars" in result:
        try:
            result, _ = render_dvc_template(
                text
            )  # yaml.load(text, Loader=SafeLoader) or {}
        except Exception as exc:
            raise YAMLFileCorruptedError(path) from exc

    return result
Example #10
0
    def get_external(self, path, dest):
        """Convenience wrapper for fetch_external and checkout."""
        path_info = PathInfo(self.root_dir) / path
        with reraise(FileNotFoundError, PathMissingError(path, self.url)):
            metadata = self.repo_tree.metadata(path_info)

        self._check_repo(path_info, metadata.repo)
        if metadata.output_exists:
            repo = metadata.repo
            cache = repo.cache.local
            # fetch DVC and git files to tmpdir cache, then checkout
            save_info = self._fetch_to_cache(path_info, repo, None)
            cache.checkout(PathInfo(dest), save_info)
        else:
            # git-only folder, just copy files directly to dest
            tree = self._get_tree_for(metadata.repo)  # ignore subrepos
            tree.copytree(path_info, dest)
Example #11
0
File: image.py Project: pared/dvc
    def as_json(self, **kwargs):

        with reraise(
                KeyError,
                DvcException(
                    f"{type(self).__name__} needs 'path' to store images."),
        ):
            path = kwargs["path"]

        results = []

        for revision, _, img_path in self._save_images(path):
            results.append({
                self.TYPE_KEY: self.TYPE,
                self.REVISIONS_KEY: [revision],
                "url": img_path,
            })

        return json.dumps(results)
Example #12
0
def get_url(path, repo=None, rev=None, remote=None):
    """
    Returns the URL to the storage location of a data file or directory tracked
    in a DVC repo. For Git repos, HEAD is used unless a rev argument is
    supplied. The default remote is tried unless a remote argument is supplied.

    Raises OutputNotFoundError if the file is not a dvc-tracked file.

    NOTE: This function does not check for the actual existence of the file or
    directory in the remote storage.
    """
    with _make_repo(repo, rev=rev) as _repo:
        path_info = PathInfo(_repo.root_dir) / path
        with reraise(FileNotFoundError, PathMissingError(path, repo)):
            metadata = _repo.repo_tree.metadata(path_info)

        if not metadata.is_dvc:
            raise OutputNotFoundError(path, repo)

        cloud = metadata.repo.cloud
        hash_info = _repo.repo_tree.get_hash(path_info)
        return cloud.get_url_for(remote, checksum=hash_info.value)
Example #13
0
def get_url(path, repo=None, rev=None, remote=None):
    """
    Returns the URL to the storage location of a data file or directory tracked
    in a DVC repo. For Git repos, HEAD is used unless a rev argument is
    supplied. The default remote is tried unless a remote argument is supplied.

    Raises OutputNotFoundError if the file is not tracked by DVC.

    NOTE: This function does not check for the actual existence of the file or
    directory in the remote storage.
    """
    with Repo.open(repo, rev=rev, subrepos=True, uninitialized=True) as _repo:
        fs_path = _repo.fs.path.join(_repo.root_dir, path)
        with reraise(FileNotFoundError, PathMissingError(path, repo)):
            info = _repo.repo_fs.info(fs_path)

        if not info["isdvc"]:
            raise OutputNotFoundError(path, repo)

        cloud = info["repo"].cloud
        md5 = info["repo"].dvcfs.info(fs_path)["md5"]
        return cloud.get_url_for(remote, checksum=md5)
Example #14
0
 def __getattr__(self, item: str) -> Column:
     with reraise(KeyError, AttributeError):
         return self.column(item)
Example #15
0
def parse_toml(text, path, decoder=None):
    with reraise(toml.TomlDecodeError, TOMLFileCorruptedError(path)):
        return toml.loads(text, decoder=decoder)
Example #16
0
File: _toml.py Project: zivzone/dvc
def parse_toml(text, path, decoder=None):
    from toml import TomlDecodeError, loads

    with reraise(TomlDecodeError, TOMLFileCorruptedError(path)):
        return loads(text, decoder=decoder)
Example #17
0
File: _common.py Project: ush98/dvc
def _load_data(path: "AnyPath", parser: ParserFn, fs: "BaseFileSystem" = None):
    open_fn = fs.open if fs else open
    encoding = "utf-8"
    with open_fn(path, encoding=encoding) as fd:  # type: ignore
        with reraise(UnicodeDecodeError, EncodingError(path, encoding)):
            return parser(fd.read(), path)
Example #18
0
def parse_yaml(text, path, typ="safe"):
    yaml = YAML(typ=typ)
    with reraise(YAMLError, YAMLFileCorruptedError(path)):
        return yaml.load(text) or {}
Example #19
0
def parse_json(text, path, **kwargs):
    with reraise(json.JSONDecodeError, JSONFileCorruptedError(path)):
        return json.loads(text, **kwargs) or {}