Exemple #1
0
def test_download(snapshotter, uploader, storage, tmpdir):
    with snapshotter.lock:
        snapshotter.create_4foobar()
        ss1 = snapshotter.get_snapshot_state()
        hashes = snapshotter.get_snapshot_hashes()

    uploader.write_hashes_to_storage(snapshotter=snapshotter,
                                     hashes=hashes,
                                     progress=Progress(),
                                     parallel=1)

    # Download the old backup from storage
    dst2 = Path(tmpdir / "dst2")
    dst2.mkdir()

    dst3 = Path(tmpdir / "dst3")
    dst3.mkdir()
    snapshotter = Snapshotter(src=dst2, dst=dst3, globs=["*"], parallel=1)
    downloader = Downloader(storage=storage,
                            snapshotter=snapshotter,
                            dst=dst2,
                            parallel=1)
    with snapshotter.lock:
        downloader.download_from_storage(progress=Progress(),
                                         snapshotstate=ss1)

        # And ensure we get same snapshot state by snapshotting it
        assert snapshotter.snapshot(progress=Progress()) > 0
        ss2 = snapshotter.get_snapshot_state()

    # Ensure the files are same (modulo mtime_ns, which doesn't
    # guaranteedly hit quite same numbers)
    for ssfile1, ssfile2 in zip(ss1.files, ss2.files):
        assert ssfile1.equals_excluding_mtime(ssfile2)
Exemple #2
0
 def create_4foobar(self):
     (self.src / "foo").write_text("foobar")
     (self.src / "foo2").write_text("foobar")
     (self.src / "foobig").write_text("foobar" * magic.EMBEDDED_FILE_SIZE)
     (self.src / "foobig2").write_text("foobar" * magic.EMBEDDED_FILE_SIZE)
     progress = Progress()
     assert self.snapshot(progress=progress) > 0
     ss1 = self.get_snapshot_state()
     assert self.snapshot(progress=Progress()) == 0
     ss2 = self.get_snapshot_state()
     assert ss1 == ss2
Exemple #3
0
def test_snapshot_error_filenotfound(snapshotter, mocker, test):
    (obj, fun, exp_progress_1, exp_progress_2) = test

    def _not_really_found(*a, **kw):
        raise FileNotFoundError

    obj = obj or snapshotter
    mocker.patch.object(obj, fun, new=_not_really_found)
    (snapshotter.src / "foo").write_text("foobar")
    (snapshotter.src / "bar").write_text("foobar")
    with snapshotter.lock:
        progress = Progress()
        assert snapshotter.snapshot(progress=progress) == exp_progress_1
        progress = Progress()
        assert snapshotter.snapshot(progress=progress) == exp_progress_2
Exemple #4
0
    async def wait_successful_results(self,
                                      start_results,
                                      *,
                                      result_class,
                                      all_nodes=True):
        urls = []

        for i, result in enumerate(start_results, 1):
            if not result or isinstance(result, Exception):
                logger.info(
                    "wait_successful_results: Incorrect start result for #%d/%d: %r",
                    i, len(start_results), result)
                return []
            parsed_result = op.Op.StartResult.parse_obj(result)
            urls.append(parsed_result.status_url)
        if all_nodes and len(urls) != len(self.nodes):
            return []
        delay = self.config.poll.delay_start
        results = [None] * len(urls)
        # Note that we don't have timeout mechanism here as such,
        # however, if re-locking times out, we will bail out. TBD if
        # we need timeout mechanism here anyway.
        failures = {}

        def _event_awaitable_factory():
            return self.subresult_received_event.wait()

        async for _ in utils.exponential_backoff(
                initial=delay,
                multiplier=self.config.poll.delay_multiplier,
                maximum=self.config.poll.delay_max,
                duration=self.config.poll.duration,
                event_awaitable_factory=_event_awaitable_factory,
        ):
            self.subresult_received_event.clear()
            for i, (url, result) in enumerate(zip(urls, results)):
                # TBD: This could be done in parallel too
                if result is not None and result.progress.final:
                    continue
                r = await utils.httpx_request(
                    url,
                    caller="CoordinatorOp.wait_successful_results",
                    timeout=self.config.poll.result_timeout)
                if r is None:
                    failures[i] = failures.get(i, 0) + 1
                    if failures[i] >= self.config.poll.maximum_failures:
                        return []
                    continue
                # We got something -> decode the result
                result = result_class.parse_obj(r)
                results[i] = result
                failures[i] = 0
                assert self.current_step
                self.step_progress[self.current_step] = Progress.merge(
                    r.progress for r in results if r is not None)
                if result.progress.finished_failed:
                    return []
            if not any(True for result in results
                       if result is None or not result.progress.final):
                break
        else:
            logger.debug("wait_successful_results timed out")
            return []
        return results
Exemple #5
0
    def write_hashes_to_storage(self,
                                *,
                                snapshotter: Snapshotter,
                                hashes,
                                parallel: int,
                                progress: Progress,
                                still_running_callback=lambda: True):
        todo = set(hash.hexdigest for hash in hashes)
        progress.start(len(todo))
        sizes = {"total": 0, "stored": 0}

        def _upload_hexdigest_in_thread(hexdigest):
            storage = self.local_storage

            assert hexdigest
            files = snapshotter.hexdigest_to_snapshotfiles.get(hexdigest, [])
            for snapshotfile in files:
                path = snapshotter.dst / snapshotfile.relative_path
                if not path.is_file():
                    logger.warning("%s disappeared post-snapshot", path)
                    continue
                with snapshotfile.open_for_reading(snapshotter.dst) as f:
                    current_hexdigest = hash_hexdigest_readable(f)
                if current_hexdigest != snapshotfile.hexdigest:
                    logger.info("Hash of %s changed before upload",
                                snapshotfile.relative_path)
                    continue
                try:
                    with snapshotfile.open_for_reading(snapshotter.dst) as f:
                        upload_result = storage.upload_hexdigest_from_file(
                            hexdigest, f)
                except exceptions.TransientException as ex:
                    # Do not pollute logs with transient exceptions
                    logger.debug("Transient exception uploading %r: %r", path,
                                 ex)
                    return progress.upload_failure, 0, 0
                except exceptions.AstacusException:
                    # Report failure - whole step will be retried later
                    logger.exception("Exception uploading %r", path)
                    return progress.upload_failure, 0, 0
                with snapshotfile.open_for_reading(snapshotter.dst) as f:
                    current_hexdigest = hash_hexdigest_readable(f)
                if current_hexdigest != snapshotfile.hexdigest:
                    logger.info("Hash of %s changed after upload",
                                snapshotfile.relative_path)
                    storage.delete_hexdigest(hexdigest)
                    continue
                return progress.upload_success, upload_result.size, upload_result.stored_size

            # We didn't find single file with the matching hexdigest.
            # Report it as missing but keep uploading other files.
            return progress.upload_missing, 0, 0

        def _result_cb(*, map_in, map_out):
            # progress callback in 'main' thread
            progress_callback, total, stored = map_out
            sizes["total"] += total
            sizes["stored"] += stored
            progress_callback(map_in)  # hexdigest
            return still_running_callback()

        sorted_todo = sorted(
            todo,
            key=lambda hexdigest: -snapshotter.hexdigest_to_snapshotfiles[
                hexdigest][0].file_size)
        if not utils.parallel_map_to(fun=_upload_hexdigest_in_thread,
                                     iterable=sorted_todo,
                                     result_callback=_result_cb,
                                     n=parallel):
            progress.add_fail()
        return sizes["total"], sizes["stored"]
Exemple #6
0
    def snapshot(self, *, progress: Optional[Progress] = None):
        assert self.lock.locked()

        if progress is None:
            progress = Progress()

        src_dirs, src_files = self._list_dirs_and_files(self.src)
        progress.start(1)
        if self.src == self.dst:
            # The src=dst mode should be used if and only if it is
            # known that files will not disappear between snapshot and
            # upload steps (e.g. Astacus controls the lifecycle of the
            # files within). In that case, there is little point in
            # making extra symlinks and we can just use the src
            # directory contents as-is.
            dst_dirs, dst_files = src_dirs, src_files
        else:
            progress.add_total(3)
            dst_dirs, dst_files = self._list_dirs_and_files(self.dst)

            # Create missing directories
            changes = self._snapshot_create_missing_directories(src_dirs=src_dirs, dst_dirs=dst_dirs)
            progress.add_success()

            # Remove extra files
            changes += self._snapshot_remove_extra_files(src_files=src_files, dst_files=dst_files)
            progress.add_success()

            # Add missing files
            changes += self._snapshot_add_missing_files(src_files=src_files, dst_files=dst_files)
            progress.add_success()

            # We COULD also remove extra directories, but it is not
            # probably really worth it and due to ignored files it
            # actually might not even work.

            # Then, create/update corresponding snapshotfile objects (old
            # ones were already removed)
            dst_dirs, dst_files = self._list_dirs_and_files(self.dst)

        snapshotfiles = list(self._get_snapshot_hash_list(dst_files))
        progress.add_total(len(snapshotfiles))

        def _cb(snapshotfile):
            # src may or may not be present; dst is present as it is in snapshot
            with snapshotfile.open_for_reading(self.dst) as f:
                if snapshotfile.file_size <= magic.EMBEDDED_FILE_SIZE:
                    snapshotfile.content_b64 = base64.b64encode(f.read()).decode()
                else:
                    snapshotfile.hexdigest = hash_hexdigest_readable(f)
            return snapshotfile

        def _result_cb(*, map_in, map_out):
            self._add_snapshotfile(map_out)
            progress.add_success()
            return True

        changes += len(snapshotfiles)
        utils.parallel_map_to(iterable=snapshotfiles, fun=_cb, result_callback=_result_cb, n=self.parallel)

        # We initially started with 1 extra
        progress.add_success()

        return changes
Exemple #7
0
 def progress(self):
     return Progress.merge(self.step_progress.values())
Exemple #8
0
def test_snapshot(snapshotter, uploader):
    with snapshotter.lock:
        # Start with empty
        assert snapshotter.snapshot(progress=Progress()) == 0
        src = snapshotter.src
        dst = snapshotter.dst
        assert not (dst / "foo").is_file()

        # Create files in src, run snapshot
        snapshotter.create_4foobar()
        ss2 = snapshotter.get_snapshot_state()

        assert (dst / "foo").is_file()
        assert (dst / "foo").read_text() == "foobar"
        assert (dst / "foo2").read_text() == "foobar"

        hashes = snapshotter.get_snapshot_hashes()
        assert len(hashes) == 1
        assert hashes == [
            ipc.SnapshotHash(
                hexdigest=
                '326827fe6fd23503bf16eed91861766df522748794814a1bf46d479d9feae1a0',
                size=600)
        ]

        while True:
            (src / "foo").write_text("barfoo")  # same length
            if snapshotter.snapshot(progress=Progress()) > 0:
                # Sometimes fails on first iteration(s) due to same mtime
                # (inaccurate timestamps)
                break
        ss3 = snapshotter.get_snapshot_state()
        assert ss2 != ss3
        assert snapshotter.snapshot(progress=Progress()) == 0
        assert (dst / "foo").is_file()
        assert (dst / "foo").read_text() == "barfoo"

        uploader.write_hashes_to_storage(snapshotter=snapshotter,
                                         hashes=hashes,
                                         parallel=1,
                                         progress=Progress())

        # Remove file from src, run snapshot
        for filename in ["foo", "foo2", "foobig", "foobig2"]:
            (src / filename).unlink()
            assert snapshotter.snapshot(progress=Progress()) > 0
            assert snapshotter.snapshot(progress=Progress()) == 0
            assert not (dst / filename).is_file()

        # Now shouldn't have any data hashes
        hashes_empty = snapshotter.get_snapshot_hashes()
        assert not hashes_empty

    with pytest.raises(AssertionError):
        snapshotter.snapshot(progress=Progress())

    with pytest.raises(AssertionError):
        snapshotter.get_snapshot_state()

    with pytest.raises(AssertionError):
        snapshotter.get_snapshot_hashes()