Beispiel #1
0
    def _get_chunks(self, download, remote, status_info, status, jobs):
        title = "Analysing status."

        progress.set_n_total(1)
        total = len(status_info)
        current = 0

        cache = []
        path_infos = []
        names = []
        for md5, info in status_info.items():
            if info["status"] == status:
                cache.append(self.checksum_to_path_info(md5))
                path_infos.append(remote.checksum_to_path_info(md5))
                names.append(info["name"])
            current += 1
            progress.update_target(title, current, total)

        progress.finish_target(title)

        progress.set_n_total(len(names))

        if download:
            to_infos = cache
            from_infos = path_infos
        else:
            to_infos = path_infos
            from_infos = cache

        return list(
            zip(
                to_chunks(from_infos, jobs),
                to_chunks(to_infos, jobs),
                to_chunks(names, jobs),
            ))
Beispiel #2
0
    def status(self, checksum_infos, remote, jobs=None, show_checksums=False):
        logger.info("Preparing to collect status from {}".format(remote.url))
        title = "Collecting information"

        ret = {}

        progress.set_n_total(1)
        progress.update_target(title, 0, 100)

        progress.update_target(title, 10, 100)

        ret = self._group(checksum_infos, show_checksums=show_checksums)
        md5s = list(ret.keys())

        progress.update_target(title, 30, 100)

        remote_exists = list(remote.cache_exists(md5s))

        progress.update_target(title, 90, 100)

        local_exists = self.cache_exists(md5s)

        progress.finish_target(title)

        for md5, info in ret.items():
            info["status"] = STATUS_MAP[
                (md5 in local_exists, md5 in remote_exists)
            ]

        return ret
Beispiel #3
0
    def status(self, checksum_infos, remote, jobs=None, show_checksums=False):
        Logger.info("Preparing to pull data from {}".format(remote.url))
        title = "Collecting information"

        progress.set_n_total(1)
        progress.update_target(title, 0, 100)

        checksum_infos, missing = self._collect(checksum_infos)
        checksum_infos += missing

        progress.update_target(title, 10, 100)

        md5s, names = self._group(checksum_infos,
                                  show_checksums=show_checksums)

        progress.update_target(title, 20, 100)

        path_infos = remote.md5s_to_path_infos(md5s)

        progress.update_target(title, 30, 100)

        remote_exists = remote.exists(path_infos)

        progress.update_target(title, 90, 100)

        local_exists = [not self.changed_cache_file(md5) for md5 in md5s]

        progress.finish_target(title)

        return [(name, STATUS_MAP[l, r])
                for name, l, r in zip(names, local_exists, remote_exists)]
Beispiel #4
0
    def test_progress_awareness(self, mocker, capsys, caplog):
        from dvc.progress import progress

        with mocker.patch("sys.stdout.isatty", return_value=True):
            progress.set_n_total(100)
            progress.update_target("progress", 1, 10)

            # logging an invisible message should not break
            # the progress bar output
            with caplog.at_level(logging.INFO, logger="dvc"):
                debug_record = logging.LogRecord(
                    name="dvc",
                    level=logging.DEBUG,
                    pathname=__name__,
                    lineno=1,
                    msg="debug",
                    args=(),
                    exc_info=None,
                )

                formatter.format(debug_record)
                captured = capsys.readouterr()
                assert "\n" not in captured.out

            # just when the message is actually visible
            with caplog.at_level(logging.INFO, logger="dvc"):
                logger.info("some info")
                captured = capsys.readouterr()
                assert "\n" in captured.out
Beispiel #5
0
    def _do_pull(self, checksum_infos, remote, jobs=1, no_progress_bar=False):
        md5s = [info[self.PARAM_MD5] for info in checksum_infos]

        # NOTE: filter files that are not corrupted
        md5s = list(filter(lambda md5: self.changed(md5), md5s))

        cache = [{'scheme': 'local', 'path': self.get(md5)} for md5 in md5s]
        path_infos = remote.md5s_to_path_infos(md5s)

        assert len(path_infos) == len(cache) == len(md5s)

        chunks = list(
            zip(to_chunks(path_infos, jobs), to_chunks(cache, jobs),
                to_chunks(md5s, jobs)))

        progress.set_n_total(len(md5s))

        if len(chunks) == 0:
            return

        futures = []
        with ThreadPoolExecutor(max_workers=len(chunks)) as executor:
            for from_infos, to_infos, md5s in chunks:
                res = executor.submit(remote.download,
                                      from_infos,
                                      to_infos,
                                      names=md5s,
                                      no_progress_bar=no_progress_bar)
                futures.append(res)

        for f in futures:
            f.result()
Beispiel #6
0
    def push(self, checksum_infos, remote, jobs=1):
        md5s = [
            info[self.PARAM_MD5] for info in self._collect(checksum_infos)[0]
        ]

        # NOTE: verifying that our cache is not corrupted
        md5s = list(filter(lambda md5: not self.changed(md5), md5s))

        # NOTE: filter files that are already uploaded
        path_infos = remote.md5s_to_path_infos(md5s)
        md5s_exist = filter(lambda x: not x[1],
                            list(zip(md5s, remote.exists(path_infos))))
        md5s = [md5 for md5, exists in md5s_exist]

        cache = [self.get(md5) for md5 in md5s]
        path_infos = remote.md5s_to_path_infos(md5s)

        assert len(path_infos) == len(cache) == len(md5s)

        chunks = list(
            zip(to_chunks(path_infos, jobs), to_chunks(cache, jobs),
                to_chunks(md5s, jobs)))

        progress.set_n_total(len(md5s))

        if len(chunks) == 0:
            return

        with ThreadPoolExecutor(max_workers=len(chunks)) as executor:
            for path_infos, paths, md5s in chunks:
                executor.submit(remote.upload, paths, path_infos, names=md5s)
Beispiel #7
0
    def status(self, checksum_infos, remote, jobs=None, show_checksums=False):
        logger.info("Preparing to collect status from {}".format(remote.url))
        title = "Collecting information"

        ret = {}

        progress.set_n_total(1)
        progress.update_target(title, 0, 100)

        progress.update_target(title, 10, 100)

        ret = self._group(checksum_infos, show_checksums=show_checksums)
        md5s = list(ret.keys())

        progress.update_target(title, 30, 100)

        remote_exists = list(remote.cache_exists(md5s))

        progress.update_target(title, 90, 100)

        local_exists = self.cache_exists(md5s)

        progress.finish_target(title)

        self._fill_statuses(ret, local_exists, remote_exists)

        self._log_missing_caches(ret)

        return ret
Beispiel #8
0
    def _do_pull(self, checksum_infos, remote, jobs=1, show_checksums=False):
        title = "Collecting information"

        progress.set_n_total(1)
        progress.update_target(title, 0, 100)

        grouped = zip(
            *self._group(checksum_infos, show_checksums=show_checksums))

        progress.update_target(title, 10, 100)

        md5s = []
        names = []
        # NOTE: filter files that are not corrupted
        for md5, name in grouped:
            if self.changed_cache(md5):
                md5s.append(md5)
                names.append(name)

        progress.update_target(title, 30, 100)

        cache = [{'scheme': 'local', 'path': self.get(md5)} for md5 in md5s]

        progress.update_target(title, 50, 100)

        path_infos = remote.md5s_to_path_infos(md5s)

        progress.update_target(title, 60, 100)

        # NOTE: dummy call to try to establish a connection
        # to see if we need to ask user for a password.
        remote.exists(remote.md5s_to_path_infos(['000']))

        progress.update_target(title, 70, 100)

        assert len(path_infos) == len(cache) == len(md5s) == len(names)

        chunks = list(
            zip(to_chunks(path_infos, jobs), to_chunks(cache, jobs),
                to_chunks(names, jobs)))

        progress.finish_target(title)

        progress.set_n_total(len(names))

        if len(chunks) == 0:
            return

        futures = []
        with ThreadPoolExecutor(max_workers=len(chunks)) as executor:
            for from_infos, to_infos, names in chunks:
                res = executor.submit(remote.download,
                                      from_infos,
                                      to_infos,
                                      names=names)
                futures.append(res)

        for f in futures:
            f.result()
Beispiel #9
0
def map_progress(func, targets, n_threads):
    """
    Process targets in multi-threaded mode with progress bar
    """
    progress.set_n_total(len(targets))
    p = ThreadPool(processes=n_threads)

    try:
        p.map(func, targets)
    except Exception as exc:
        Logger.error('Unexpected exception while processing targets: {}'.format(exc))
    finally:
        progress.finish()
Beispiel #10
0
def map_progress(func, targets, n_threads):
    """
    Process targets in multi-threaded mode with progress bar
    """
    progress.set_n_total(len(targets))
    pool = ThreadPool(processes=n_threads)
    ret = []

    wrapper = lambda t: wrap(func, t)

    try:
        ret = pool.map(wrapper, targets)
    except Exception as exc:
        raise

    return list(zip(targets, ret))
Beispiel #11
0
def map_progress(func, targets, n_threads):
    """
    Process targets in multi-threaded mode with progress bar
    """
    progress.set_n_total(len(targets))
    pool = ThreadPool(processes=n_threads)
    ret = []

    try:
        ret = pool.map(func, targets)
    except Exception as exc:
        raise
    finally:
        progress.finish()

    return list(zip(targets, ret))
Beispiel #12
0
    def status(
        self,
        checksum_infos,
        remote,
        jobs=None,
        show_checksums=False,
        download=False,
    ):
        logger.info(
            "Preparing to collect status from {}".format(remote.path_info)
        )
        title = "Collecting information"

        ret = {}

        progress.set_n_total(1)
        progress.update_target(title, 0, 100)

        progress.update_target(title, 10, 100)

        ret = self._group(checksum_infos, show_checksums=show_checksums)
        md5s = list(ret)

        progress.update_target(title, 30, 100)

        local_exists = self.cache_exists(md5s)

        progress.update_target(title, 40, 100)

        # This is a performance optimization. We can safely assume that,
        # if the resources that we want to fetch are already cached,
        # there's no need to check the remote storage for the existance of
        # those files.
        if download and sorted(local_exists) == sorted(md5s):
            remote_exists = local_exists
        else:
            remote_exists = list(remote.cache_exists(md5s))

        progress.update_target(title, 90, 100)

        progress.finish_target(title)

        self._fill_statuses(ret, local_exists, remote_exists)

        self._log_missing_caches(ret)

        return ret
Beispiel #13
0
    def push(self, checksum_infos, remote, jobs=1, show_checksums=False):
        checksum_infos = self._collect(checksum_infos)[0]

        # NOTE: verifying that our cache is not corrupted
        def func(info):
            return not self.changed(info[self.PARAM_MD5])
        checksum_infos = list(filter(func, checksum_infos))

        # NOTE: filter files that are already uploaded
        md5s = [i[self.PARAM_MD5] for i in checksum_infos]
        exists = remote.exists(remote.md5s_to_path_infos(md5s))

        def func(entry):
            return not entry[0]

        assert len(exists) == len(checksum_infos)
        infos_exist = list(filter(func, zip(exists, checksum_infos)))
        checksum_infos = [i for e, i in infos_exist]

        md5s, names = self._group(checksum_infos,
                                  show_checksums=show_checksums)
        cache = [{'scheme': 'local', 'path': self.get(md5)} for md5 in md5s]
        path_infos = remote.md5s_to_path_infos(md5s)

        assert len(path_infos) == len(cache) == len(md5s) == len(names)

        chunks = list(zip(to_chunks(path_infos, jobs),
                          to_chunks(cache, jobs),
                          to_chunks(names, jobs)))

        progress.set_n_total(len(names))

        if len(chunks) == 0:
            return

        futures = []
        with ThreadPoolExecutor(max_workers=len(chunks)) as executor:
            for to_infos, from_infos, names in chunks:
                res = executor.submit(remote.upload,
                                      from_infos,
                                      to_infos,
                                      names=names)
                futures.append(res)

        for f in futures:
            f.result()
Beispiel #14
0
def map_progress(func, targets, n_threads):
    """
    Process targets in multi-threaded mode with progress bar
    """
    progress.set_n_total(len(targets))
    pool = ThreadPool(processes=n_threads)
    ret = []

    try:
        ret = pool.map(func, targets)
    except Exception as exc:
        Logger.error(
            'Unexpected exception while processing targets: {}'.format(exc),
            exc_info=True)
    finally:
        progress.finish()

    return list(zip(targets, ret))
Beispiel #15
0
    def _do_pull(self,
                 checksum_infos,
                 remote,
                 jobs=1,
                 no_progress_bar=False,
                 show_checksums=False):
        md5s = []
        names = []
        # NOTE: filter files that are not corrupted
        for md5, name in zip(*self._group(checksum_infos,
                                          show_checksums=show_checksums)):
            if self.changed(md5):
                md5s.append(md5)
                names.append(name)

        cache = [{'scheme': 'local', 'path': self.get(md5)} for md5 in md5s]
        path_infos = remote.md5s_to_path_infos(md5s)

        assert len(path_infos) == len(cache) == len(md5s) == len(names)

        chunks = list(zip(to_chunks(path_infos, jobs),
                          to_chunks(cache, jobs),
                          to_chunks(names, jobs)))

        progress.set_n_total(len(names))

        if len(chunks) == 0:
            return

        futures = []
        with ThreadPoolExecutor(max_workers=len(chunks)) as executor:
            for from_infos, to_infos, names in chunks:
                res = executor.submit(remote.download,
                                      from_infos,
                                      to_infos,
                                      names=names,
                                      no_progress_bar=no_progress_bar)
                futures.append(res)

        for f in futures:
            f.result()
Beispiel #16
0
    def push(self, checksum_infos, remote, jobs=1):
        checksum_infos = self._collect(checksum_infos)[0]
        md5s = [info[self.PARAM_MD5] for info in checksum_infos]

        # NOTE: verifying that our cache is not corrupted
        md5s = list(filter(lambda md5: not self.changed(md5), md5s))

        # NOTE: filter files that are already uploaded
        path_infos = remote.md5s_to_path_infos(md5s)
        lexists = remote.exists(path_infos)
        md5s_exist = filter(lambda x: not x[1], list(zip(md5s, lexists)))
        md5s = [md5 for md5, exists in md5s_exist]

        cache = [{'scheme': 'local', 'path': self.get(md5)} for md5 in md5s]
        path_infos = remote.md5s_to_path_infos(md5s)

        assert len(path_infos) == len(cache) == len(md5s)

        chunks = list(
            zip(to_chunks(path_infos, jobs), to_chunks(cache, jobs),
                to_chunks(md5s, jobs)))

        progress.set_n_total(len(md5s))

        if len(chunks) == 0:
            return

        futures = []
        with ThreadPoolExecutor(max_workers=len(chunks)) as executor:
            for to_infos, from_infos, md5s in chunks:
                res = executor.submit(remote.upload,
                                      from_infos,
                                      to_infos,
                                      names=md5s)
                futures.append(res)

        for f in futures:
            f.result()
Beispiel #17
0
    def push(self, checksum_infos, remote, jobs=None, show_checksums=False):
        Logger.info("Preparing to push data to {}".format(remote.url))
        title = "Collecting information"

        progress.set_n_total(1)
        progress.update_target(title, 0, 100)

        checksum_infos = self._collect(checksum_infos)[0]

        progress.update_target(title, 10, 100)

        # NOTE: verifying that our cache is not corrupted
        def func(info):
            return not self.changed_cache_file(info[self.PARAM_MD5])

        checksum_infos = list(filter(func, checksum_infos))

        progress.update_target(title, 20, 100)

        # NOTE: filter files that are already uploaded
        md5s = [i[self.PARAM_MD5] for i in checksum_infos]
        exists = remote.exists(remote.md5s_to_path_infos(md5s))

        progress.update_target(title, 30, 100)

        def func(entry):
            return not entry[0]

        assert len(exists) == len(checksum_infos)
        infos_exist = list(filter(func, zip(exists, checksum_infos)))
        checksum_infos = [i for e, i in infos_exist]

        progress.update_target(title, 70, 100)

        md5s, names = self._group(checksum_infos,
                                  show_checksums=show_checksums)
        cache = [{'scheme': 'local', 'path': self.get(md5)} for md5 in md5s]

        progress.update_target(title, 80, 100)

        path_infos = remote.md5s_to_path_infos(md5s)

        assert len(path_infos) == len(cache) == len(md5s) == len(names)

        progress.update_target(title, 90, 100)

        if jobs is None:
            jobs = remote.JOBS

        chunks = list(
            zip(to_chunks(path_infos, jobs), to_chunks(cache, jobs),
                to_chunks(names, jobs)))

        progress.finish_target(title)

        progress.set_n_total(len(names))

        if len(chunks) == 0:
            return

        futures = []
        with ThreadPoolExecutor(max_workers=len(chunks)) as executor:
            for to_infos, from_infos, names in chunks:
                res = executor.submit(remote.upload,
                                      from_infos,
                                      to_infos,
                                      names=names)
                futures.append(res)

        for f in futures:
            f.result()