def remove_unused_links(self, used): """Removes all saved links except the ones that are used. Args: used (list): list of used links that should not be removed. """ unused = [] self._execute("SELECT * FROM {}".format(self.LINK_STATE_TABLE)) for row in self.cursor: relpath, inode, mtime = row inode = self._from_sqlite(inode) path = os.path.join(self.root_dir, relpath) if path in used: continue if not os.path.exists(path): continue actual_inode = get_inode(path) actual_mtime, _ = get_mtime_and_size(path, self.repo.dvcignore) if inode == actual_inode and mtime == actual_mtime: logger.debug("Removing '{}' as unused link.".format(path)) remove(path) unused.append(relpath) for chunk_unused in to_chunks(unused, chunk_size=SQLITE_MAX_VARIABLES_NUMBER): cmd = "DELETE FROM {} WHERE path IN ({})".format( self.LINK_STATE_TABLE, ",".join(["?"] * len(chunk_unused))) self._execute(cmd, tuple(chunk_unused))
def get(self, path_info): """Gets the hash for the specified path info. Hash will be retrieved from the state database if available. Args: path_info (dict): path info to get the hash for. Returns: HashInfo or None: hash for the specified path info or None if it doesn't exist in the state database. """ assert isinstance(path_info, str) or path_info.scheme == "local" path = os.fspath(path_info) # NOTE: use os.path.exists instead of LocalFileSystem.exists # because it uses lexists() and will return True for broken # symlinks that we cannot stat() in get_mtime_and_size if not os.path.exists(path): return None actual_mtime, actual_size = get_mtime_and_size(path, self.fs) actual_inode = get_inode(path) existing_record = self.get_state_record_for_inode(actual_inode) if not existing_record: return None mtime, size, value, _ = existing_record if self._file_metadata_changed(actual_mtime, mtime, actual_size, size): return None self._update_state_record_timestamp_for_inode(actual_inode) return HashInfo("md5", value, size=int(actual_size))
def _do_update(self, path, known_checksum=None): """ Make sure the stored info for the given path is up to date. """ if not os.path.exists(path): return None, None actual_mtime, actual_size = get_mtime_and_size(path) actual_inode = get_inode(path) existing_record = self.get_state_record_for_inode(actual_inode) if existing_record: md5, info = self._update_existing_state_record( path, actual_inode, actual_mtime, actual_size, existing_record, known_checksum, ) else: md5, info = self._insert_new_state_record(path, actual_inode, actual_mtime, actual_size, known_checksum) return md5, info
def save(self, path_info, fs, hash_info): """Save hash for the specified path info. Args: path_info (dict): path_info to save hash for. hash_info (HashInfo): hash to save. """ if not isinstance(fs, LocalFileSystem): return assert isinstance(path_info, str) or path_info.scheme == "local" assert hash_info assert isinstance(hash_info, HashInfo) assert os.path.exists(path_info) actual_mtime, actual_size = get_mtime_and_size(path_info, self.fs) actual_inode = get_inode(path_info) existing_record = self.get_state_record_for_inode(actual_inode) if not existing_record: self._insert_new_state_record( actual_inode, actual_mtime, actual_size, hash_info.value ) return self._update_state_for_path_changed( actual_inode, actual_mtime, actual_size, hash_info.value )
def get_unused_links(self, used, fs): """Removes all saved links except the ones that are used. Args: used (list): list of used links that should not be removed. """ if not isinstance(fs, LocalFileSystem): return unused = [] with self.links as ref: for relative_path in ref: path = os.path.join(self.root_dir, relative_path) if path in used or not self.fs.exists(path): continue inode = get_inode(path) mtime, _ = get_mtime_and_size(path, self.fs) if ref[relative_path] == (inode, mtime): logger.debug("Removing '%s' as unused link.", path) unused.append(relative_path) return unused
def save_link(self, path_info): """Adds the specified path to the list of links created by dvc. This list is later used on `dvc checkout` to cleanup old links. Args: path_info (dict): path info to add to the list of links. """ assert path_info.scheme == "local" path = fspath_py35(path_info) if not os.path.exists(path): return mtime, _ = get_mtime_and_size(path) inode = get_inode(path) relative_path = relpath(path, self.root_dir) cmd = ("REPLACE INTO {}(path, inode, mtime) " 'VALUES ("{}", {}, "{}")'.format( self.LINK_STATE_TABLE, relative_path, self._to_sqlite(inode), mtime, )) self._execute(cmd)
def get(self, path_info, fs): """Gets the hash for the specified path info. Hash will be retrieved from the state database if available. Args: path_info (dict): path info to get the hash for. Returns: HashInfo or None: hash for the specified path info or None if it doesn't exist in the state database. """ if not isinstance(fs, LocalFileSystem): return None assert isinstance(path_info, str) or path_info.scheme == "local" path = os.fspath(path_info) # NOTE: use os.path.exists instead of LocalFileSystem.exists # because it uses lexists() and will return True for broken # symlinks that we cannot stat() in get_mtime_and_size if not os.path.exists(path): return None mtime, size = get_mtime_and_size(path, self.fs) inode = get_inode(path) value = self.md5s.get(inode) if not value or value[0] != mtime or value[1] != size: return None return HashInfo("md5", value[2], size=int(size))
def get(self, path_info): """Gets the checksum for the specified path info. Checksum will be retrieved from the state database if available. Args: path_info (dict): path info to get the checksum for. Returns: str or None: checksum for the specified path info or None if it doesn't exist in the state database. """ assert path_info.scheme == "local" path = fspath_py35(path_info) if not os.path.exists(path): return None actual_mtime, actual_size = get_mtime_and_size(path, self.repo.dvcignore) actual_inode = get_inode(path) existing_record = self.get_state_record_for_inode(actual_inode) if not existing_record: return None mtime, size, checksum, _ = existing_record if self._file_metadata_changed(actual_mtime, mtime, actual_size, size): return None self._update_state_record_timestamp_for_inode(actual_inode) return checksum
def get_unused_links(self, used): """Removes all saved links except the ones that are used. Args: used (list): list of used links that should not be removed. """ unused = [] self._execute(f"SELECT * FROM {self.LINK_STATE_TABLE}") for row in self.cursor: relative_path, inode, mtime = row inode = self._from_sqlite(inode) path = os.path.join(self.root_dir, relative_path) if path in used or not self.tree.exists(path): continue actual_inode = get_inode(path) actual_mtime, _ = get_mtime_and_size(path, self.tree) if (inode, mtime) == (actual_inode, actual_mtime): logger.debug("Removing '%s' as unused link.", path) unused.append(relative_path) return unused
def get(self, path_info, fs): """Gets the hash for the specified path info. Hash will be retrieved from the state database if available. Args: path_info (dict): path info to get the hash for. Returns: HashInfo or None: hash for the specified path info or None if it doesn't exist in the state database. """ from .objects.meta import Meta if not isinstance(fs, LocalFileSystem): return None, None try: mtime, size = get_mtime_and_size(path_info, fs, self.dvcignore) except FileNotFoundError: return None, None inode = get_inode(path_info) value = self.md5s.get(inode) if not value or value[0] != mtime or value[1] != str(size): return None, None return Meta(size=size), HashInfo("md5", value[2])
def save(self, path_info, checksum): """Save checksum for the specified path info. Args: path_info (dict): path_info to save checksum for. checksum (str): checksum to save. """ assert path_info.scheme == "local" assert checksum is not None path = fspath_py35(path_info) assert os.path.exists(path) actual_mtime, actual_size = get_mtime_and_size(path, self.repo.dvcignore) actual_inode = get_inode(path) existing_record = self.get_state_record_for_inode(actual_inode) if not existing_record: self._insert_new_state_record(actual_inode, actual_mtime, actual_size, checksum) return self._update_state_for_path_changed(actual_inode, actual_mtime, actual_size, checksum)
def get(self, path_info): """Gets the checksum for the specified path info. Checksum will be retrieved from the state database if available. Args: path_info (dict): path info to get the checksum for. Returns: str or None: checksum for the specified path info or None if it doesn't exist in the state database. """ assert isinstance(path_info, str) or path_info.scheme == "local" path = os.fspath(path_info) # NOTE: use os.path.exists instead of WorkingTree.exists # WorkingTree.exists uses lexists() and will return True for broken # symlinks that we cannot stat() in get_mtime_and_size if not os.path.exists(path): return None actual_mtime, actual_size = get_mtime_and_size(path, self.tree) actual_inode = get_inode(path) existing_record = self.get_state_record_for_inode(actual_inode) if not existing_record: return None mtime, size, checksum, _ = existing_record if self._file_metadata_changed(actual_mtime, mtime, actual_size, size): return None self._update_state_record_timestamp_for_inode(actual_inode) return checksum
def remove_unused_links(self, used): """Removes all saved links except the ones that are used. Args: used (list): list of used links that should not be removed. """ unused = [] self._execute("SELECT * FROM {}".format(self.LINK_STATE_TABLE)) for row in self.cursor: relpath, inode, mtime = row inode = self._from_sqlite(inode) path = os.path.join(self.root_dir, relpath) if path in used: continue if not os.path.exists(path): continue actual_inode = get_inode(path) actual_mtime, _ = get_mtime_and_size(path) if inode == actual_inode and mtime == actual_mtime: logger.debug("Removing '{}' as unused link.".format(path)) remove(path) unused.append(relpath) for relpath in unused: cmd = 'DELETE FROM {} WHERE path = "{}"' self._execute(cmd.format(self.LINK_STATE_TABLE, relpath))
def save(self, path_info, fs, hash_info): """Save hash for the specified path info. Args: path_info (dict): path_info to save hash for. hash_info (HashInfo): hash to save. """ if not isinstance(fs, LocalFileSystem): return assert isinstance(path_info, str) or path_info.scheme == "local" assert hash_info assert isinstance(hash_info, HashInfo) assert os.path.exists(path_info) mtime, size = get_mtime_and_size(path_info, fs, self.dvcignore) inode = get_inode(path_info) logger.debug( "state save (%s, %s, %s) %s", inode, mtime, str(size), hash_info.value, ) self.md5s[inode] = (mtime, str(size), hash_info.value)
def _cache_metadata_changed(self): mtime, size = get_mtime_and_size(self.cache_dir) inode = get_inode(self.cache_dir) existing_record = self.state.get_state_record_for_inode(inode) if existing_record: cached_mtime, cached_size, _, _ = existing_record return not (mtime == cached_mtime and size == cached_size) return True
def _changed_cache_dir(self): mtime, size = get_mtime_and_size(self.cache_dir) inode = get_inode(self.cache_dir) existing_record = self.state.get_state_record_for_inode(inode) if existing_record: cached_mtime, cached_size, _, _ = existing_record changed = not (mtime == cached_mtime and size == cached_size) else: changed = True return changed
def _update_cache_directory_state(self): cache_path = self.repo.cache.local.cache_dir mtime, size = get_mtime_and_size(cache_path) inode = get_inode(cache_path) cmd = ("INSERT OR REPLACE INTO {}(inode, size, mtime, timestamp, md5) " 'VALUES ({}, "{}", "{}", "{}", "")'.format( self.STATE_TABLE, self._to_sqlite(inode), size, mtime, current_timestamp(), )) self._execute(cmd)
def _changed_dir_cache(self, checksum): mtime, size = get_mtime_and_size(self.cache_dir) inode = get_inode(self.cache_dir) existing_record = self.state.get_state_record_for_inode(inode) if existing_record: cached_mtime, cached_size, _, _ = existing_record changed = not (mtime == cached_mtime and size == cached_size) else: changed = True if not changed: return False return super(RemoteLOCAL, self)._changed_dir_cache(checksum)
def update_link(self, path): """Adds the specified path to the list of links created by dvc. This list is later used on `dvc checkout` to cleanup old links. Args: path (str): path to add to the list of links. """ if not os.path.exists(path): return mtime, _ = get_mtime_and_size(path) inode = get_inode(path) relpath = os.path.relpath(path, self.root_dir) cmd = ("REPLACE INTO {}(path, inode, mtime) " 'VALUES ("{}", {}, "{}")'.format(self.LINK_STATE_TABLE, relpath, self._to_sqlite(inode), mtime)) self._execute(cmd)
def save_link(self, path_info): """Adds the specified path to the list of links created by dvc. This list is later used on `dvc checkout` to cleanup old links. Args: path_info (dict): path info to add to the list of links. """ assert isinstance(path_info, str) or path_info.scheme == "local" if not self.tree.exists(path_info): return mtime, _ = get_mtime_and_size(path_info, self.tree) inode = get_inode(path_info) relative_path = relpath(path_info, self.root_dir) cmd = "REPLACE INTO {}(path, inode, mtime) " "VALUES (?, ?, ?)".format( self.LINK_STATE_TABLE) self._execute(cmd, (relative_path, self._to_sqlite(inode), mtime))
def save_link(self, path_info, fs): """Adds the specified path to the list of links created by dvc. This list is later used on `dvc checkout` to cleanup old links. Args: path_info (dict): path info to add to the list of links. """ if not isinstance(fs, LocalFileSystem): return try: mtime, _ = get_mtime_and_size(path_info, fs, self.dvcignore) except FileNotFoundError: return inode = get_inode(path_info) relative_path = relpath(path_info, self.root_dir) with self.links as ref: ref[relative_path] = (inode, mtime)
def save_link(self, path_info, fs): """Adds the specified path to the list of links created by dvc. This list is later used on `dvc checkout` to cleanup old links. Args: path_info (dict): path info to add to the list of links. """ if not isinstance(fs, LocalFileSystem): return assert isinstance(path_info, str) or path_info.scheme == "local" if not self.fs.exists(path_info): return mtime, _ = get_mtime_and_size(path_info, self.fs) inode = get_inode(path_info) relative_path = relpath(path_info, self.root_dir) with self.links as ref: ref[relative_path] = (inode, mtime)
def save(self, path, fs, hash_info): """Save hash for the specified path info. Args: path (str): path to save hash for. hash_info (HashInfo): hash to save. """ if not isinstance(fs, LocalFileSystem): return mtime, size = get_mtime_and_size(path, fs, self.dvcignore) inode = get_inode(path) logger.debug( "state save (%s, %s, %s) %s", inode, mtime, str(size), hash_info.value, ) self.md5s[inode] = (mtime, str(size), hash_info.value)
def test_get_inode(tmp_dir): tmp_dir.gen("foo", "foo content") assert get_inode("foo") == get_inode(PathInfo("foo"))
def test_get_inode(repo_dir): path = repo_dir.FOO path_info = PathInfo(path) assert get_inode(path) == get_inode(path_info)
def get_inode_mocked(path): if path == special_path: return special_value else: return get_inode(path)