Exemple #1
0
    def remove_unused_links(self, used):
        """Removes all saved links except the ones that are used.

        Args:
            used (list): list of used links that should not be removed.
        """
        unused = []

        self._execute("SELECT * FROM {}".format(self.LINK_STATE_TABLE))
        for row in self.cursor:
            relpath, inode, mtime = row
            inode = self._from_sqlite(inode)
            path = os.path.join(self.root_dir, relpath)

            if path in used:
                continue

            if not os.path.exists(path):
                continue

            actual_inode = get_inode(path)
            actual_mtime, _ = get_mtime_and_size(path, self.repo.dvcignore)

            if inode == actual_inode and mtime == actual_mtime:
                logger.debug("Removing '{}' as unused link.".format(path))
                remove(path)
                unused.append(relpath)

        for chunk_unused in to_chunks(unused,
                                      chunk_size=SQLITE_MAX_VARIABLES_NUMBER):
            cmd = "DELETE FROM {} WHERE path IN ({})".format(
                self.LINK_STATE_TABLE, ",".join(["?"] * len(chunk_unused)))
            self._execute(cmd, tuple(chunk_unused))
Exemple #2
0
    def get(self, path_info):
        """Gets the hash for the specified path info. Hash will be
        retrieved from the state database if available.

        Args:
            path_info (dict): path info to get the hash for.

        Returns:
            HashInfo or None: hash for the specified path info or None if it
            doesn't exist in the state database.
        """
        assert isinstance(path_info, str) or path_info.scheme == "local"
        path = os.fspath(path_info)

        # NOTE: use os.path.exists instead of LocalFileSystem.exists
        # because it uses lexists() and will return True for broken
        # symlinks that we cannot stat() in get_mtime_and_size
        if not os.path.exists(path):
            return None

        actual_mtime, actual_size = get_mtime_and_size(path, self.fs)
        actual_inode = get_inode(path)

        existing_record = self.get_state_record_for_inode(actual_inode)
        if not existing_record:
            return None

        mtime, size, value, _ = existing_record
        if self._file_metadata_changed(actual_mtime, mtime, actual_size, size):
            return None

        self._update_state_record_timestamp_for_inode(actual_inode)
        return HashInfo("md5", value, size=int(actual_size))
Exemple #3
0
    def _do_update(self, path, known_checksum=None):
        """
        Make sure the stored info for the given path is up to date.
        """
        if not os.path.exists(path):
            return None, None

        actual_mtime, actual_size = get_mtime_and_size(path)
        actual_inode = get_inode(path)

        existing_record = self.get_state_record_for_inode(actual_inode)

        if existing_record:
            md5, info = self._update_existing_state_record(
                path,
                actual_inode,
                actual_mtime,
                actual_size,
                existing_record,
                known_checksum,
            )
        else:
            md5, info = self._insert_new_state_record(path, actual_inode,
                                                      actual_mtime,
                                                      actual_size,
                                                      known_checksum)

        return md5, info
Exemple #4
0
    def save(self, path_info, fs, hash_info):
        """Save hash for the specified path info.

        Args:
            path_info (dict): path_info to save hash for.
            hash_info (HashInfo): hash to save.
        """

        if not isinstance(fs, LocalFileSystem):
            return

        assert isinstance(path_info, str) or path_info.scheme == "local"
        assert hash_info
        assert isinstance(hash_info, HashInfo)
        assert os.path.exists(path_info)

        actual_mtime, actual_size = get_mtime_and_size(path_info, self.fs)
        actual_inode = get_inode(path_info)

        existing_record = self.get_state_record_for_inode(actual_inode)
        if not existing_record:
            self._insert_new_state_record(
                actual_inode, actual_mtime, actual_size, hash_info.value
            )
            return

        self._update_state_for_path_changed(
            actual_inode, actual_mtime, actual_size, hash_info.value
        )
Exemple #5
0
    def get_unused_links(self, used, fs):
        """Removes all saved links except the ones that are used.

        Args:
            used (list): list of used links that should not be removed.
        """
        if not isinstance(fs, LocalFileSystem):
            return

        unused = []

        with self.links as ref:
            for relative_path in ref:
                path = os.path.join(self.root_dir, relative_path)

                if path in used or not self.fs.exists(path):
                    continue

                inode = get_inode(path)
                mtime, _ = get_mtime_and_size(path, self.fs)

                if ref[relative_path] == (inode, mtime):
                    logger.debug("Removing '%s' as unused link.", path)
                    unused.append(relative_path)

        return unused
Exemple #6
0
    def save_link(self, path_info):
        """Adds the specified path to the list of links created by dvc. This
        list is later used on `dvc checkout` to cleanup old links.

        Args:
            path_info (dict): path info to add to the list of links.
        """
        assert path_info.scheme == "local"
        path = fspath_py35(path_info)

        if not os.path.exists(path):
            return

        mtime, _ = get_mtime_and_size(path)
        inode = get_inode(path)
        relative_path = relpath(path, self.root_dir)

        cmd = ("REPLACE INTO {}(path, inode, mtime) "
               'VALUES ("{}", {}, "{}")'.format(
                   self.LINK_STATE_TABLE,
                   relative_path,
                   self._to_sqlite(inode),
                   mtime,
               ))
        self._execute(cmd)
Exemple #7
0
    def get(self, path_info, fs):
        """Gets the hash for the specified path info. Hash will be
        retrieved from the state database if available.

        Args:
            path_info (dict): path info to get the hash for.

        Returns:
            HashInfo or None: hash for the specified path info or None if it
            doesn't exist in the state database.
        """
        if not isinstance(fs, LocalFileSystem):
            return None

        assert isinstance(path_info, str) or path_info.scheme == "local"
        path = os.fspath(path_info)

        # NOTE: use os.path.exists instead of LocalFileSystem.exists
        # because it uses lexists() and will return True for broken
        # symlinks that we cannot stat() in get_mtime_and_size
        if not os.path.exists(path):
            return None

        mtime, size = get_mtime_and_size(path, self.fs)
        inode = get_inode(path)

        value = self.md5s.get(inode)

        if not value or value[0] != mtime or value[1] != size:
            return None

        return HashInfo("md5", value[2], size=int(size))
Exemple #8
0
    def get(self, path_info):
        """Gets the checksum for the specified path info. Checksum will be
        retrieved from the state database if available.

        Args:
            path_info (dict): path info to get the checksum for.

        Returns:
            str or None: checksum for the specified path info or None if it
            doesn't exist in the state database.
        """
        assert path_info.scheme == "local"
        path = fspath_py35(path_info)

        if not os.path.exists(path):
            return None

        actual_mtime, actual_size = get_mtime_and_size(path,
                                                       self.repo.dvcignore)
        actual_inode = get_inode(path)

        existing_record = self.get_state_record_for_inode(actual_inode)
        if not existing_record:
            return None

        mtime, size, checksum, _ = existing_record
        if self._file_metadata_changed(actual_mtime, mtime, actual_size, size):
            return None

        self._update_state_record_timestamp_for_inode(actual_inode)
        return checksum
Exemple #9
0
    def get_unused_links(self, used):
        """Removes all saved links except the ones that are used.

        Args:
            used (list): list of used links that should not be removed.
        """
        unused = []

        self._execute(f"SELECT * FROM {self.LINK_STATE_TABLE}")
        for row in self.cursor:
            relative_path, inode, mtime = row
            inode = self._from_sqlite(inode)
            path = os.path.join(self.root_dir, relative_path)

            if path in used or not self.tree.exists(path):
                continue

            actual_inode = get_inode(path)
            actual_mtime, _ = get_mtime_and_size(path, self.tree)

            if (inode, mtime) == (actual_inode, actual_mtime):
                logger.debug("Removing '%s' as unused link.", path)
                unused.append(relative_path)

        return unused
Exemple #10
0
    def get(self, path_info, fs):
        """Gets the hash for the specified path info. Hash will be
        retrieved from the state database if available.

        Args:
            path_info (dict): path info to get the hash for.

        Returns:
            HashInfo or None: hash for the specified path info or None if it
            doesn't exist in the state database.
        """
        from .objects.meta import Meta

        if not isinstance(fs, LocalFileSystem):
            return None, None

        try:
            mtime, size = get_mtime_and_size(path_info, fs, self.dvcignore)
        except FileNotFoundError:
            return None, None

        inode = get_inode(path_info)

        value = self.md5s.get(inode)

        if not value or value[0] != mtime or value[1] != str(size):
            return None, None

        return Meta(size=size), HashInfo("md5", value[2])
Exemple #11
0
    def save(self, path_info, checksum):
        """Save checksum for the specified path info.

        Args:
            path_info (dict): path_info to save checksum for.
            checksum (str): checksum to save.
        """
        assert path_info.scheme == "local"
        assert checksum is not None

        path = fspath_py35(path_info)
        assert os.path.exists(path)

        actual_mtime, actual_size = get_mtime_and_size(path,
                                                       self.repo.dvcignore)
        actual_inode = get_inode(path)

        existing_record = self.get_state_record_for_inode(actual_inode)
        if not existing_record:
            self._insert_new_state_record(actual_inode, actual_mtime,
                                          actual_size, checksum)
            return

        self._update_state_for_path_changed(actual_inode, actual_mtime,
                                            actual_size, checksum)
Exemple #12
0
    def get(self, path_info):
        """Gets the checksum for the specified path info. Checksum will be
        retrieved from the state database if available.

        Args:
            path_info (dict): path info to get the checksum for.

        Returns:
            str or None: checksum for the specified path info or None if it
            doesn't exist in the state database.
        """
        assert isinstance(path_info, str) or path_info.scheme == "local"
        path = os.fspath(path_info)

        # NOTE: use os.path.exists instead of WorkingTree.exists
        # WorkingTree.exists uses lexists() and will return True for broken
        # symlinks that we cannot stat() in get_mtime_and_size
        if not os.path.exists(path):
            return None

        actual_mtime, actual_size = get_mtime_and_size(path, self.tree)
        actual_inode = get_inode(path)

        existing_record = self.get_state_record_for_inode(actual_inode)
        if not existing_record:
            return None

        mtime, size, checksum, _ = existing_record
        if self._file_metadata_changed(actual_mtime, mtime, actual_size, size):
            return None

        self._update_state_record_timestamp_for_inode(actual_inode)
        return checksum
Exemple #13
0
    def remove_unused_links(self, used):
        """Removes all saved links except the ones that are used.

        Args:
            used (list): list of used links that should not be removed.
        """
        unused = []

        self._execute("SELECT * FROM {}".format(self.LINK_STATE_TABLE))
        for row in self.cursor:
            relpath, inode, mtime = row
            inode = self._from_sqlite(inode)
            path = os.path.join(self.root_dir, relpath)

            if path in used:
                continue

            if not os.path.exists(path):
                continue

            actual_inode = get_inode(path)
            actual_mtime, _ = get_mtime_and_size(path)

            if inode == actual_inode and mtime == actual_mtime:
                logger.debug("Removing '{}' as unused link.".format(path))
                remove(path)
                unused.append(relpath)

        for relpath in unused:
            cmd = 'DELETE FROM {} WHERE path = "{}"'
            self._execute(cmd.format(self.LINK_STATE_TABLE, relpath))
Exemple #14
0
    def save(self, path_info, fs, hash_info):
        """Save hash for the specified path info.

        Args:
            path_info (dict): path_info to save hash for.
            hash_info (HashInfo): hash to save.
        """

        if not isinstance(fs, LocalFileSystem):
            return

        assert isinstance(path_info, str) or path_info.scheme == "local"
        assert hash_info
        assert isinstance(hash_info, HashInfo)
        assert os.path.exists(path_info)

        mtime, size = get_mtime_and_size(path_info, fs, self.dvcignore)
        inode = get_inode(path_info)

        logger.debug(
            "state save (%s, %s, %s) %s",
            inode,
            mtime,
            str(size),
            hash_info.value,
        )

        self.md5s[inode] = (mtime, str(size), hash_info.value)
Exemple #15
0
    def _cache_metadata_changed(self):
        mtime, size = get_mtime_and_size(self.cache_dir)
        inode = get_inode(self.cache_dir)

        existing_record = self.state.get_state_record_for_inode(inode)

        if existing_record:
            cached_mtime, cached_size, _, _ = existing_record
            return not (mtime == cached_mtime and size == cached_size)

        return True
Exemple #16
0
    def _changed_cache_dir(self):
        mtime, size = get_mtime_and_size(self.cache_dir)
        inode = get_inode(self.cache_dir)

        existing_record = self.state.get_state_record_for_inode(inode)
        if existing_record:
            cached_mtime, cached_size, _, _ = existing_record
            changed = not (mtime == cached_mtime and size == cached_size)
        else:
            changed = True

        return changed
Exemple #17
0
    def _update_cache_directory_state(self):
        cache_path = self.repo.cache.local.cache_dir
        mtime, size = get_mtime_and_size(cache_path)
        inode = get_inode(cache_path)

        cmd = ("INSERT OR REPLACE INTO {}(inode, size, mtime, timestamp, md5) "
               'VALUES ({}, "{}", "{}", "{}", "")'.format(
                   self.STATE_TABLE,
                   self._to_sqlite(inode),
                   size,
                   mtime,
                   current_timestamp(),
               ))
        self._execute(cmd)
Exemple #18
0
    def _changed_dir_cache(self, checksum):
        mtime, size = get_mtime_and_size(self.cache_dir)
        inode = get_inode(self.cache_dir)

        existing_record = self.state.get_state_record_for_inode(inode)
        if existing_record:
            cached_mtime, cached_size, _, _ = existing_record
            changed = not (mtime == cached_mtime and size == cached_size)
        else:
            changed = True

        if not changed:
            return False

        return super(RemoteLOCAL, self)._changed_dir_cache(checksum)
Exemple #19
0
    def update_link(self, path):
        """Adds the specified path to the list of links created by dvc. This
        list is later used on `dvc checkout` to cleanup old links.

        Args:
            path (str): path to add to the list of links.
        """
        if not os.path.exists(path):
            return

        mtime, _ = get_mtime_and_size(path)
        inode = get_inode(path)
        relpath = os.path.relpath(path, self.root_dir)

        cmd = ("REPLACE INTO {}(path, inode, mtime) "
               'VALUES ("{}", {}, "{}")'.format(self.LINK_STATE_TABLE, relpath,
                                                self._to_sqlite(inode), mtime))
        self._execute(cmd)
Exemple #20
0
    def save_link(self, path_info):
        """Adds the specified path to the list of links created by dvc. This
        list is later used on `dvc checkout` to cleanup old links.

        Args:
            path_info (dict): path info to add to the list of links.
        """
        assert isinstance(path_info, str) or path_info.scheme == "local"

        if not self.tree.exists(path_info):
            return

        mtime, _ = get_mtime_and_size(path_info, self.tree)
        inode = get_inode(path_info)
        relative_path = relpath(path_info, self.root_dir)

        cmd = "REPLACE INTO {}(path, inode, mtime) " "VALUES (?, ?, ?)".format(
            self.LINK_STATE_TABLE)
        self._execute(cmd, (relative_path, self._to_sqlite(inode), mtime))
Exemple #21
0
    def save_link(self, path_info, fs):
        """Adds the specified path to the list of links created by dvc. This
        list is later used on `dvc checkout` to cleanup old links.

        Args:
            path_info (dict): path info to add to the list of links.
        """
        if not isinstance(fs, LocalFileSystem):
            return

        try:
            mtime, _ = get_mtime_and_size(path_info, fs, self.dvcignore)
        except FileNotFoundError:
            return

        inode = get_inode(path_info)
        relative_path = relpath(path_info, self.root_dir)

        with self.links as ref:
            ref[relative_path] = (inode, mtime)
Exemple #22
0
    def save_link(self, path_info, fs):
        """Adds the specified path to the list of links created by dvc. This
        list is later used on `dvc checkout` to cleanup old links.

        Args:
            path_info (dict): path info to add to the list of links.
        """
        if not isinstance(fs, LocalFileSystem):
            return

        assert isinstance(path_info, str) or path_info.scheme == "local"

        if not self.fs.exists(path_info):
            return

        mtime, _ = get_mtime_and_size(path_info, self.fs)
        inode = get_inode(path_info)
        relative_path = relpath(path_info, self.root_dir)

        with self.links as ref:
            ref[relative_path] = (inode, mtime)
Exemple #23
0
    def save(self, path, fs, hash_info):
        """Save hash for the specified path info.

        Args:
            path (str): path to save hash for.
            hash_info (HashInfo): hash to save.
        """

        if not isinstance(fs, LocalFileSystem):
            return

        mtime, size = get_mtime_and_size(path, fs, self.dvcignore)
        inode = get_inode(path)

        logger.debug(
            "state save (%s, %s, %s) %s",
            inode,
            mtime,
            str(size),
            hash_info.value,
        )

        self.md5s[inode] = (mtime, str(size), hash_info.value)
Exemple #24
0
def test_get_inode(tmp_dir):
    tmp_dir.gen("foo", "foo content")

    assert get_inode("foo") == get_inode(PathInfo("foo"))
Exemple #25
0
def test_get_inode(repo_dir):
    path = repo_dir.FOO
    path_info = PathInfo(path)
    assert get_inode(path) == get_inode(path_info)
Exemple #26
0
 def get_inode_mocked(path):
     if path == special_path:
         return special_value
     else:
         return get_inode(path)