def copyfile(src, dest, no_progress_bar=False, name=None): """Copy file with progress bar""" from dvc.exceptions import DvcException from dvc.progress import Tqdm from dvc.system import System src = fspath_py35(src) dest = fspath_py35(dest) name = name if name else os.path.basename(dest) total = os.stat(src).st_size if os.path.isdir(dest): dest = os.path.join(dest, os.path.basename(src)) try: System.reflink(src, dest) except DvcException: with Tqdm(desc=name, disable=no_progress_bar, total=total, bytes=True) as pbar: with open(src, "rb") as fsrc, open(dest, "wb+") as fdest: while True: buf = fsrc.read(LOCAL_CHUNK_SIZE) if not buf: break fdest.write(buf) pbar.update(len(buf))
def ignore(self, path): entry, gitignore = self._get_gitignore(path) ignore_list = [] if os.path.exists(gitignore): ignore_list = open(gitignore, "r").readlines() filtered = list( filter(lambda x: x.strip() == entry.strip(), ignore_list) ) if filtered: return msg = "Adding '{}' to '{}'.".format( os.path.relpath(path), os.path.relpath(gitignore) ) logger.info(msg) content = entry if ignore_list: content = "\n" + content with open(gitignore, "a") as fobj: fobj.write(content) if self.repo is not None: self.repo.files_to_git_add.append(os.path.relpath(gitignore))
def copyfile(src, dest, no_progress_bar=False, name=None): """Copy file with progress bar""" from dvc.exceptions import DvcException from dvc.progress import progress from dvc.system import System copied = 0 name = name if name else os.path.basename(dest) total = os.stat(src).st_size if os.path.isdir(dest): dest = os.path.join(dest, os.path.basename(src)) try: System.reflink(src, dest) except DvcException: with open(src, "rb") as fsrc, open(dest, "wb+") as fdest: while True: buf = fsrc.read(LOCAL_CHUNK_SIZE) if not buf: break fdest.write(buf) copied += len(buf) if not no_progress_bar: progress.update_target(name, copied, total) if not no_progress_bar: progress.finish_target(name)
def copyfile(src, dest, no_progress_bar=False, name=None): """Copy file with progress bar""" from dvc.progress import progress copied = 0 name = name if name else os.path.basename(dest) total = os.stat(src).st_size fsrc = open(src, "rb") if os.path.isdir(dest): fdest = open(os.path.join(dest, os.path.basename(src)), "wb+") else: fdest = open(dest, "wb+") while True: buf = fsrc.read(LOCAL_CHUNK_SIZE) if not buf: break fdest.write(buf) copied += len(buf) if not no_progress_bar: progress.update_target(name, copied, total) if not no_progress_bar: progress.finish_target(name) fsrc.close() fdest.close()
def init(dvc_dir): """Initializes dvc config. Args: dvc_dir (str): path to .dvc directory. Returns: dvc.config.Config: config object. """ config_file = os.path.join(dvc_dir, Config.CONFIG) open(config_file, "w+").close() return Config(dvc_dir)
def _reflink_linux(src, dst): import os import fcntl FICLONE = 0x40049409 try: ret = 255 with open(src, "r") as s, open(dst, "w+") as d: ret = fcntl.ioctl(d.fileno(), FICLONE, s.fileno()) finally: if ret != 0: os.unlink(dst) return ret
def _install_hook(self, name, cmd): command = "dvc {}".format(cmd) hook = os.path.join(self.root_dir, self.GIT_DIR, "hooks", name) if os.path.isfile(hook): with open(hook, "r+") as fobj: if command not in fobj.read(): fobj.write("exec {command}\n".format(command=command)) else: with open(hook, "w+") as fobj: fobj.write("#!/bin/sh\n" "exec {command}\n".format(command=command)) os.chmod(hook, 0o777)
def ignore_remove(self, path): entry, gitignore = self._get_gitignore(path) if not os.path.exists(gitignore): return with open(gitignore, "r") as fobj: lines = fobj.readlines() filtered = list(filter(lambda x: x.strip() != entry.strip(), lines)) with open(gitignore, "w") as fobj: fobj.writelines(filtered) self.track_file(relpath(gitignore))
def ignore(self, path, in_curr_dir=False): base_dir = ( os.path.realpath(os.curdir) if in_curr_dir else os.path.dirname(path) ) entry, gitignore = self._get_gitignore(path, base_dir) ignore_list = [] if os.path.exists(gitignore): with open(gitignore, "r") as f: ignore_list = f.readlines() if any(filter(lambda x: x.strip() == entry.strip(), ignore_list)): return msg = "Adding '{}' to '{}'.".format( os.path.relpath(path), os.path.relpath(gitignore) ) logger.info(msg) self._add_entry_to_gitignore(entry, gitignore, ignore_list) self.track_file(os.path.relpath(gitignore)) self.ignored_paths.append(path)
def load(repo, fname): Stage._check_file_exists(fname) Stage._check_dvc_filename(fname) if not Stage.is_stage_file(fname): raise StageFileIsNotDvcFileError(fname) with open(fname, "r") as fd: d = yaml.safe_load(fd) or {} Stage.validate(d, fname=os.path.relpath(fname)) path = os.path.abspath(fname) stage = Stage( repo=repo, path=path, wdir=os.path.abspath( os.path.join( os.path.dirname(path), d.get(Stage.PARAM_WDIR, ".") ) ), cmd=d.get(Stage.PARAM_CMD), md5=d.get(Stage.PARAM_MD5), locked=d.get(Stage.PARAM_LOCKED, False), ) stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, [])) stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, [])) return stage
def ignore_remove(self, path): entry, gitignore = self._get_gitignore(path) if not os.path.exists(gitignore): return with open(gitignore, "r") as fobj: lines = fobj.readlines() filtered = list(filter(lambda x: x.strip() != entry.strip(), lines)) with open(gitignore, "w") as fobj: fobj.writelines(filtered) if self.repo is not None: self.repo.files_to_git_add.append(os.path.relpath(gitignore))
def _ignored(entry, gitignore_path): if os.path.exists(gitignore_path): with open(gitignore_path, "r") as fobj: ignore_list = fobj.readlines() return any( filter(lambda x: x.strip() == entry.strip(), ignore_list)) return False
def _install_hook(self, name, cmd): command = ('[ "$3" = "0" ]' ' || [ -z "$(git ls-files .dvc)" ]' " || exec dvc {}".format(cmd)) hook = self._hook_path(name) if os.path.isfile(hook): with open(hook, "r+") as fobj: if command not in fobj.read(): fobj.write("{command}\n".format(command=command)) else: with open(hook, "w+") as fobj: fobj.write("#!/bin/sh\n" "{command}\n".format(command=command)) os.chmod(hook, 0o777)
def _install_hook(self, name, cmd): hook = os.path.join(self.root_dir, self.GIT_DIR, "hooks", name) if os.path.isfile(hook): msg = "git hook '{}' already exists." raise SCMError(msg.format(os.path.relpath(hook))) with open(hook, "w+") as fobj: fobj.write("#!/bin/sh\nexec dvc {}\n".format(cmd)) os.chmod(hook, 0o777)
def load_stage_file(path): from dvc.exceptions import StageFileCorruptedError with open(path, "r") as fobj: try: return yaml.safe_load(fobj) or {} except ScannerError: raise StageFileCorruptedError(path)
def __init__(self, ignore_file_path): assert os.path.isabs(ignore_file_path) self.ignore_file_path = ignore_file_path self.dirname = os.path.normpath(os.path.dirname(ignore_file_path)) with open(ignore_file_path, encoding="utf-8") as fobj: self.ignore_spec = PathSpec.from_lines(GitWildMatchPattern, fobj)
def create(self, name, contents): dname = os.path.dirname(name) if len(dname) > 0 and not os.path.isdir(dname): os.makedirs(dname) with open(name, "a", encoding="utf-8") as f: f.write(contents if isinstance(contents, str) else contents. decode("utf-8"))
def _read_metrics_filesystem(path, typ, xpath, rel_path, branch): if not os.path.exists(path): return None with open(path, "r") as fd: return _read_metric(fd, typ=typ, xpath=xpath, rel_path=rel_path, branch=branch)
def _link(self, from_info, to_info, link_types): from_path = from_info.fspath to_path = to_info.fspath assert os.path.isfile(from_path) dname = os.path.dirname(to_path) if not os.path.exists(dname): os.makedirs(dname) # NOTE: just create an empty file for an empty cache if os.path.getsize(from_path) == 0: open(to_path, "w+").close() msg = "Created empty file: {} -> {}".format(from_path, to_path) logger.debug(msg) return self._try_links(from_info, to_info, link_types)
def _add_entry_to_gitignore(entry, gitignore): with open(gitignore, "a+", encoding="utf-8") as fobj: fobj.seek(0, os.SEEK_END) if fobj.tell() == 0: # Empty file prefix = "" else: fobj.seek(fobj.tell() - 1, os.SEEK_SET) last = fobj.read(1) prefix = "" if last == "\n" else "\n" fobj.write("{}{}\n".format(prefix, entry))
def _get_latest_version(self): import json try: r = requests.get(self.URL, timeout=self.TIMEOUT_GET) info = r.json() except requests.exceptions.RequestException as exc: msg = "Failed to retrieve latest version: {}" logger.debug(msg.format(exc)) return with open(self.updater_file, "w+") as fobj: json.dump(info, fobj)
def dump(self): fname = self.path self._check_dvc_filename(fname) logger.info("Saving information to '{file}'.".format( file=os.path.relpath(fname))) d = self.dumpd() with open(fname, "w") as fd: yaml.safe_dump(d, fd, default_flow_style=False) self.repo.scm.track_file(os.path.relpath(fname))
def _download(self, from_info, to_file, name=None, no_progress_bar=False): request = self._request("GET", from_info.url, stream=True) with Tqdm( total=None if no_progress_bar else self._content_length(from_info), leave=False, bytes=True, desc_truncate=from_info.url if name is None else name, disable=no_progress_bar, ) as pbar: with open(to_file, "wb") as fd: for chunk in request.iter_content(chunk_size=self.CHUNK_SIZE): fd.write(chunk) fd.flush() pbar.update(len(chunk))
def dump(self, fname=None): fname = fname or self.path self._check_dvc_filename(fname) logger.info( "Saving information to '{file}'.".format( file=os.path.relpath(fname) ) ) with open(fname, "w") as fd: yaml.safe_dump(self.dumpd(), fd, default_flow_style=False) self.repo.files_to_git_add.append(os.path.relpath(fname))
def _reflink_linux(src, dst): import os import fcntl FICLONE = 0x40049409 s = open(src, "r") d = open(dst, "w+") try: ret = fcntl.ioctl(d.fileno(), FICLONE, s.fileno()) except IOError: s.close() d.close() os.unlink(dst) raise s.close() d.close() if ret != 0: os.unlink(dst) return ret
def _write_request_content( self, mode, partial_file, request, transferred_bytes, callback=None ): with open(partial_file, mode) as fd: for index, chunk in enumerate( request.iter_content(chunk_size=self.CHUNK_SIZE) ): chunk_number = index + 1 if chunk_number * self.CHUNK_SIZE > transferred_bytes: fd.write(chunk) fd.flush() transferred_bytes += len(chunk) if callback: callback(transferred_bytes)
def _download(self, from_info, to_file, name=None, no_progress_bar=False): response = self._request("GET", from_info.url, stream=True) if response.status_code != 200: raise HTTPError(response.status_code, response.reason) with Tqdm( total=None if no_progress_bar else self._content_length(response), leave=False, bytes=True, desc=from_info.url if name is None else name, disable=no_progress_bar, ) as pbar: with open(to_file, "wb") as fd: for chunk in response.iter_content(chunk_size=self.CHUNK_SIZE): fd.write(chunk) pbar.update(len(chunk))
def _get_cache_type(self, path_info): if self.cache_type_confirmed: return self.cache_types[0] workspace_file = path_info.with_name("." + uuid()) test_cache_file = self.path_info / ".cache_type_test_file" if not self.exists(test_cache_file): with open(fspath_py35(test_cache_file), "wb") as fobj: fobj.write(bytes(1)) try: self.link(test_cache_file, workspace_file) finally: self.remove(workspace_file) self.remove(test_cache_file) self.cache_type_confirmed = True return self.cache_types[0]
def _download(self, from_info, to_file, name=None, no_progress_bar=False): callback = None if not no_progress_bar: total = self._content_length(from_info.url) if total: callback = ProgressBarCallback(name, total) request = self._request("GET", from_info.url, stream=True) with open(to_file, "wb") as fd: transferred_bytes = 0 for chunk in request.iter_content(chunk_size=self.CHUNK_SIZE): fd.write(chunk) fd.flush() transferred_bytes += len(chunk) if callback: callback(transferred_bytes)
def file_md5(fname): """ get the (md5 hexdigest, md5 digest) of a file """ from dvc.progress import Tqdm from dvc.istextfile import istextfile fname = fspath_py35(fname) if os.path.exists(fname): hash_md5 = hashlib.md5() binary = not istextfile(fname) size = os.path.getsize(fname) no_progress_bar = True if size >= LARGE_FILE_SIZE: no_progress_bar = False msg = ( "Computing md5 for a large file '{}'. This is only done once.") logger.info(msg.format(relpath(fname))) name = relpath(fname) with Tqdm( desc=name, disable=no_progress_bar, total=size, bytes=True, leave=False, ) as pbar: with open(fname, "rb") as fobj: while True: data = fobj.read(LOCAL_CHUNK_SIZE) if not data: break if binary: chunk = data else: chunk = dos2unix(data) hash_md5.update(chunk) pbar.update(len(data)) return (hash_md5.hexdigest(), hash_md5.digest()) return (None, None)