Esempio n. 1
0
    def testRelative(self):
        """Check that we can get subpaths back from two URIs"""
        parent = ButlerURI(self.tmpdir,
                           forceDirectory=True,
                           forceAbsolute=True)
        self.assertTrue(parent.isdir())
        child = ButlerURI(os.path.join(self.tmpdir, "dir1", "file.txt"),
                          forceAbsolute=True)

        self.assertEqual(child.relative_to(parent), "dir1/file.txt")

        not_child = ButlerURI("/a/b/dir1/file.txt")
        self.assertFalse(not_child.relative_to(parent))
        self.assertFalse(not_child.isdir())

        not_directory = ButlerURI(
            os.path.join(self.tmpdir, "dir1", "file2.txt"))
        self.assertFalse(child.relative_to(not_directory))

        # Relative URIs
        parent = ButlerURI("a/b/", forceAbsolute=False)
        child = ButlerURI("a/b/c/d.txt", forceAbsolute=False)
        self.assertFalse(child.scheme)
        self.assertEqual(child.relative_to(parent), "c/d.txt")

        # File URI and schemeless URI
        parent = ButlerURI("file:/a/b/c/")
        child = ButlerURI("e/f/g.txt", forceAbsolute=False)

        # If the child is relative and the parent is absolute we assume
        # that the child is a child of the parent unless it uses ".."
        self.assertEqual(child.relative_to(parent), "e/f/g.txt")

        child = ButlerURI("../e/f/g.txt", forceAbsolute=False)
        self.assertFalse(child.relative_to(parent))

        child = ButlerURI("../c/e/f/g.txt", forceAbsolute=False)
        self.assertEqual(child.relative_to(parent), "e/f/g.txt")
Esempio n. 2
0
    def retrieveArtifacts(self,
                          refs: Iterable[DatasetRef],
                          destination: ButlerURI,
                          transfer: str = "auto",
                          preserve_path: bool = True,
                          overwrite: bool = False) -> List[ButlerURI]:
        """Retrieve the file artifacts associated with the supplied refs.

        Parameters
        ----------
        refs : iterable of `DatasetRef`
            The datasets for which file artifacts are to be retrieved.
            A single ref can result in multiple files. The refs must
            be resolved.
        destination : `ButlerURI`
            Location to write the file artifacts.
        transfer : `str`, optional
            Method to use to transfer the artifacts. Must be one of the options
            supported by `ButlerURI.transfer_from()`. "move" is not allowed.
        preserve_path : `bool`, optional
            If `True` the full path of the file artifact within the datastore
            is preserved. If `False` the final file component of the path
            is used.
        overwrite : `bool`, optional
            If `True` allow transfers to overwrite existing files at the
            destination.

        Returns
        -------
        targets : `list` of `ButlerURI`
            URIs of file artifacts in destination location. Order is not
            preserved.
        """
        if not destination.isdir():
            raise ValueError(
                f"Destination location must refer to a directory. Given {destination}"
            )

        # Using getURIs is not feasible since it becomes difficult to
        # determine the path within the datastore later on. For now
        # follow getURIs implementation approach.

        pending = set(refs)

        # There is a question as to whether an exception should be raised
        # early if some of the refs are missing, or whether files should be
        # transferred until a problem is hit. Prefer to complain up front.
        # Use the datastore integer as primary key.
        grouped_by_datastore: Dict[int, Set[DatasetRef]] = {}

        for number, datastore in enumerate(self.datastores):
            if datastore.isEphemeral:
                # In the future we will want to distinguish in-memory from
                # caching datastore since using an on-disk local
                # cache is exactly what we should be doing.
                continue
            datastore_refs = {ref for ref in pending if datastore.exists(ref)}

            if datastore_refs:
                grouped_by_datastore[number] = datastore_refs

                # Remove these from the pending list so that we do not bother
                # looking for them any more.
                pending = pending - datastore_refs

        if pending:
            raise RuntimeError(
                f"Some datasets were not found in any datastores: {pending}")

        # Now do the transfer.
        targets: List[ButlerURI] = []
        for number, datastore_refs in grouped_by_datastore.items():
            targets.extend(self.datastores[number].retrieveArtifacts(
                datastore_refs,
                destination,
                transfer=transfer,
                preserve_path=preserve_path,
                overwrite=overwrite))

        return targets