def parse_py(text, path): """Parses text from .py file into Python structure.""" with reraise(SyntaxError, PythonFileCorruptedError(path)): tree = ast.parse(text, filename=path) result = _ast_tree_to_dict(tree) return result
def fetch_external(self, paths: Iterable, **kwargs): """Fetch specified external repo paths into cache. Returns 3-tuple in the form (downloaded, failed, list(cache_infos)) where cache_infos can be used as checkout targets for the fetched paths. """ download_results = [] failed = 0 root = PathInfo(self.root_dir) paths = [root / path for path in paths] def download_update(result): download_results.append(result) hash_infos = [] for path in paths: with reraise(FileNotFoundError, PathMissingError(path, self.url)): metadata = self.repo_tree.metadata(path) self._check_repo(path, metadata.repo) repo = metadata.repo hash_info = self._fetch_to_cache(path, repo, download_update, **kwargs) hash_infos.append(hash_info) return sum(download_results), failed, hash_infos
def parse_yaml(text, path, typ="safe"): from ruamel.yaml import YAML from ruamel.yaml import YAMLError as _YAMLError yaml = YAML(typ=typ) with reraise(_YAMLError, YAMLFileCorruptedError(path)): return yaml.load(text) or {}
def _each_iter(self, key): err_message = f"Could not find '{key}' in foreach group '{self.name}'" with reraise(KeyError, EntryNotFound(err_message)): value = self.normalized_iterable[key] # NOTE: we need to use resolved iterable/foreach-data, # not the normalized ones to figure out whether to make item/key # available inserted = self._inserted_keys(self.resolved_iterable) temp_dict = {self.pair.value: value} key_str = self.pair.key if key_str in inserted: temp_dict[key_str] = key with self.context.set_temporarily(temp_dict, reserve=True): # optimization: item and key can be removed on __exit__() as they # are top-level values, and are not merged recursively. # This helps us avoid cloning context, which is slower # (increasing the size of the context might increase # the no. of items to be generated which means more cloning, # i.e. quadratic complexity). generated = self._generate_name(key) entry = EntryDefinition(self.resolver, self.context, generated, self.do_definition) try: # optimization: skip checking for syntax errors on each foreach # generated stages. We do it once when accessing do_definition. return entry.resolve_stage(skip_checks=True) except ContextError as exc: format_and_raise( exc, f"stage '{generated}'", self.relpath, )
def get_checksum(self, path): path_info = PathInfo(self.root_dir) / path with reraise(FileNotFoundError, PathMissingError(path, self.url)): metadata = self.repo_tree.metadata(path_info) # skip subrepos to check for tree = self._get_tree_for(metadata.repo) return tree.get_hash(path_info)
def parse_py_for_update(text, path): """Parses text into dict for update params.""" with reraise(SyntaxError, PythonFileCorruptedError(path)): tree = ast.parse(text, filename=path) result = _ast_tree_to_dict(tree) result.update({_PARAMS_KEY: _ast_tree_to_dict(tree, lineno=True)}) result.update({_PARAMS_TEXT_KEY: text}) return result
def parse_yaml(text, path, typ="safe"): yaml = YAML(typ=typ) try: with reraise(_YAMLError, YAMLFileCorruptedError(path)): return yaml.load(text) or {} except DuplicateKeyError as exc: # NOTE: unfortunately this one doesn't inherit from YAMLError, so we # have to catch it by-hand. See # https://yaml.readthedocs.io/en/latest/api.html#duplicate-keys raise YAMLError(path, exc.problem)
def to_datapoints(self, **kwargs): with reraise( [ParseError, csv.Error], PlotParsingError(self.filename, self.revision), ): data = self.raw(**kwargs) for data_proc in self._processors(): data = data_proc( data, filename=self.filename, revision=self.revision, **kwargs ) return data
def parse_yaml(text, path, typ="safe"): yaml = YAML(typ=typ) with reraise(YAMLError, YAMLFileCorruptedError(path)): result = yaml.load(text) or {} if "vars" in result: try: result, _ = render_dvc_template( text ) # yaml.load(text, Loader=SafeLoader) or {} except Exception as exc: raise YAMLFileCorruptedError(path) from exc return result
def get_external(self, path, dest): """Convenience wrapper for fetch_external and checkout.""" path_info = PathInfo(self.root_dir) / path with reraise(FileNotFoundError, PathMissingError(path, self.url)): metadata = self.repo_tree.metadata(path_info) self._check_repo(path_info, metadata.repo) if metadata.output_exists: repo = metadata.repo cache = repo.cache.local # fetch DVC and git files to tmpdir cache, then checkout save_info = self._fetch_to_cache(path_info, repo, None) cache.checkout(PathInfo(dest), save_info) else: # git-only folder, just copy files directly to dest tree = self._get_tree_for(metadata.repo) # ignore subrepos tree.copytree(path_info, dest)
def as_json(self, **kwargs): with reraise( KeyError, DvcException( f"{type(self).__name__} needs 'path' to store images."), ): path = kwargs["path"] results = [] for revision, _, img_path in self._save_images(path): results.append({ self.TYPE_KEY: self.TYPE, self.REVISIONS_KEY: [revision], "url": img_path, }) return json.dumps(results)
def get_url(path, repo=None, rev=None, remote=None): """ Returns the URL to the storage location of a data file or directory tracked in a DVC repo. For Git repos, HEAD is used unless a rev argument is supplied. The default remote is tried unless a remote argument is supplied. Raises OutputNotFoundError if the file is not a dvc-tracked file. NOTE: This function does not check for the actual existence of the file or directory in the remote storage. """ with _make_repo(repo, rev=rev) as _repo: path_info = PathInfo(_repo.root_dir) / path with reraise(FileNotFoundError, PathMissingError(path, repo)): metadata = _repo.repo_tree.metadata(path_info) if not metadata.is_dvc: raise OutputNotFoundError(path, repo) cloud = metadata.repo.cloud hash_info = _repo.repo_tree.get_hash(path_info) return cloud.get_url_for(remote, checksum=hash_info.value)
def get_url(path, repo=None, rev=None, remote=None): """ Returns the URL to the storage location of a data file or directory tracked in a DVC repo. For Git repos, HEAD is used unless a rev argument is supplied. The default remote is tried unless a remote argument is supplied. Raises OutputNotFoundError if the file is not tracked by DVC. NOTE: This function does not check for the actual existence of the file or directory in the remote storage. """ with Repo.open(repo, rev=rev, subrepos=True, uninitialized=True) as _repo: fs_path = _repo.fs.path.join(_repo.root_dir, path) with reraise(FileNotFoundError, PathMissingError(path, repo)): info = _repo.repo_fs.info(fs_path) if not info["isdvc"]: raise OutputNotFoundError(path, repo) cloud = info["repo"].cloud md5 = info["repo"].dvcfs.info(fs_path)["md5"] return cloud.get_url_for(remote, checksum=md5)
def __getattr__(self, item: str) -> Column: with reraise(KeyError, AttributeError): return self.column(item)
def parse_toml(text, path, decoder=None): with reraise(toml.TomlDecodeError, TOMLFileCorruptedError(path)): return toml.loads(text, decoder=decoder)
def parse_toml(text, path, decoder=None): from toml import TomlDecodeError, loads with reraise(TomlDecodeError, TOMLFileCorruptedError(path)): return loads(text, decoder=decoder)
def _load_data(path: "AnyPath", parser: ParserFn, fs: "BaseFileSystem" = None): open_fn = fs.open if fs else open encoding = "utf-8" with open_fn(path, encoding=encoding) as fd: # type: ignore with reraise(UnicodeDecodeError, EncodingError(path, encoding)): return parser(fd.read(), path)
def parse_yaml(text, path, typ="safe"): yaml = YAML(typ=typ) with reraise(YAMLError, YAMLFileCorruptedError(path)): return yaml.load(text) or {}
def parse_json(text, path, **kwargs): with reraise(json.JSONDecodeError, JSONFileCorruptedError(path)): return json.loads(text, **kwargs) or {}