Esempio n. 1
0
    def testQuoting(self):
        """Check that quoting works."""
        parent = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True)
        subpath = "rootdir/dir1+/file?.txt"
        child = ButlerURI(self.makeS3Uri(urllib.parse.quote(subpath)))

        self.assertEqual(child.relative_to(parent), "dir1+/file?.txt")
        self.assertEqual(child.basename(), "file?.txt")
        self.assertEqual(child.relativeToPathRoot, subpath)
        self.assertIn("%", child.path)
        self.assertEqual(child.unquoted_path, "/" + subpath)
Esempio n. 2
0
    def testButlerUriSerialization(self):
        """Test that we can pickle and yaml"""
        uri = ButlerURI("a/b/c/d")
        uri2 = pickle.loads(pickle.dumps(uri))
        self.assertEqual(uri, uri2)
        self.assertFalse(uri2.dirLike)

        uri = ButlerURI("a/b/c/d", forceDirectory=True)
        uri2 = pickle.loads(pickle.dumps(uri))
        self.assertEqual(uri, uri2)
        self.assertTrue(uri2.dirLike)
Esempio n. 3
0
    def _extractIngestInfo(self,
                           path: str,
                           ref: DatasetRef,
                           *,
                           formatter: Union[Formatter, Type[Formatter]],
                           transfer: Optional[str] = None) -> StoredFileInfo:
        # Docstring inherited from FileLikeDatastore._extractIngestInfo.
        srcUri = ButlerURI(path)
        if transfer is None:
            rootUri = ButlerURI(self.root)
            p = pathlib.PurePosixPath(srcUri.relativeToPathRoot)
            pathInStore = str(p.relative_to(rootUri.relativeToPathRoot))
            tgtLocation = self.locationFactory.fromPath(pathInStore)
        else:
            assert transfer == "move" or transfer == "copy", "Should be guaranteed by _standardizeIngestPath"

            # Work out the name we want this ingested file to have
            # inside the datastore
            tgtLocation = self._calculate_ingested_datastore_name(
                srcUri, ref, formatter)

            if srcUri.scheme == "file":
                # source is on local disk.
                with open(srcUri.ospath, 'rb') as f:
                    self.client.put_object(Bucket=tgtLocation.netloc,
                                           Key=tgtLocation.relativeToPathRoot,
                                           Body=f)
                if transfer == "move":
                    os.remove(srcUri.ospath)
            elif srcUri.scheme == "s3":
                # source is another S3 Bucket
                relpath = srcUri.relativeToPathRoot
                copySrc = {"Bucket": srcUri.netloc, "Key": relpath}
                self.client.copy(copySrc, self.locationFactory.netloc,
                                 tgtLocation.relativeToPathRoot)
                if transfer == "move":
                    # https://github.com/boto/boto3/issues/507 - there is no
                    # way of knowing if the file was actually deleted except
                    # for checking all the keys again, reponse is  HTTP 204 OK
                    # response all the time
                    self.client.delete(Bucket=srcUri.netloc, Key=relpath)

        # the file should exist on the bucket by now
        _, size = s3CheckFileExists(path=tgtLocation.relativeToPathRoot,
                                    bucket=tgtLocation.netloc,
                                    client=self.client)

        return StoredFileInfo(formatter=formatter,
                              path=tgtLocation.pathInStore,
                              storageClass=ref.datasetType.storageClass,
                              component=ref.datasetType.component(),
                              file_size=size,
                              checksum=None)
Esempio n. 4
0
    def testEnvVar(self):
        """Test that environment variables are expanded."""

        with unittest.mock.patch.dict(os.environ, {"MY_TEST_DIR": "/a/b/c"}):
            uri = ButlerURI("${MY_TEST_DIR}/d.txt")
        self.assertEqual(uri.path, "/a/b/c/d.txt")
        self.assertEqual(uri.scheme, "file")

        # This will not expand
        uri = ButlerURI("${MY_TEST_DIR}/d.txt", forceAbsolute=False)
        self.assertEqual(uri.path, "${MY_TEST_DIR}/d.txt")
        self.assertFalse(uri.scheme)
Esempio n. 5
0
    def testRelative(self):
        """Check that we can get subpaths back from two URIs"""
        parent = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True)
        child = ButlerURI(self.makeS3Uri("rootdir/dir1/file.txt"))

        self.assertEqual(child.relative_to(parent), "dir1/file.txt")

        not_child = ButlerURI(self.makeS3Uri("/a/b/dir1/file.txt"))
        self.assertFalse(not_child.relative_to(parent))

        not_s3 = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt"))
        self.assertFalse(child.relative_to(not_s3))
Esempio n. 6
0
    def testFileExists(self):
        self.assertTrue(
            s3CheckFileExists(client=self.client,
                              bucket=self.bucketName,
                              path=self.fileName)[0])
        self.assertFalse(
            s3CheckFileExists(client=self.client,
                              bucket=self.bucketName,
                              path=self.fileName + "_NO_EXIST")[0])

        datastoreRootUri = f"s3://{self.bucketName}/"
        uri = f"s3://{self.bucketName}/{self.fileName}"

        buri = ButlerURI(uri)
        location = Location(datastoreRootUri, self.fileName)

        self.assertTrue(s3CheckFileExists(client=self.client, path=buri)[0])
        # just to make sure the overloaded keyword works correctly
        self.assertTrue(s3CheckFileExists(buri, client=self.client)[0])
        self.assertTrue(
            s3CheckFileExists(client=self.client, path=location)[0])

        # make sure supplying strings resolves correctly too
        self.assertTrue(s3CheckFileExists(uri, client=self.client))
        self.assertTrue(s3CheckFileExists(uri))
Esempio n. 7
0
    def saveUri(self, uri):
        """Save `QuantumGraph` to the specified URI.

        Parameters
        ----------
        uri : `ButlerURI` or `str`
            URI to where the graph should be saved.
        """
        buffer = self._buildSaveObject()
        butlerUri = ButlerURI(uri)
        if butlerUri.getExtension() not in (".qgraph"):
            raise TypeError(
                f"Can currently only save a graph in qgraph format not {uri}")
        butlerUri.write(
            buffer
        )  # type: ignore  # Ignore because bytearray is safe to use in place of bytes
Esempio n. 8
0
    def testResource(self):
        u = ButlerURI("resource://lsst.daf.butler/configs/datastore.yaml")
        self.assertTrue(u.exists(), f"Check {u} exists")

        content = u.read().decode()
        self.assertTrue(content.startswith("datastore:"))

        truncated = u.read(size=9).decode()
        self.assertEqual(truncated, "datastore")

        d = ButlerURI("resource://lsst.daf.butler/configs", forceDirectory=True)
        self.assertTrue(u.exists(), f"Check directory {d} exists")

        j = d.join("datastore.yaml")
        self.assertEqual(u, j)
        self.assertFalse(j.dirLike)
        self.assertFalse(d.join("not-there.yaml").exists())
Esempio n. 9
0
    def testUriExtensions(self):
        """Test extension extraction."""

        files = (("file.fits.gz", ".fits.gz"),
                 ("file.fits", ".fits"),
                 ("file.fits.xz", ".fits.xz"),
                 ("file.fits.tar", ".tar"),
                 ("file", ""),
                 ("flat_i_sim_1.4_blah.fits.gz", ".fits.gz"),
                 ("flat_i_sim_1.4_blah.txt", ".txt"),
                 ("flat_i_sim_1.4_blah.fits.fz", ".fits.fz"),
                 ("flat_i_sim_1.4_blah.fits.txt", ".txt"),
                 )

        for file, expected in files:
            uri = ButlerURI(f"a/b/{file}")
            self.assertEqual(uri.getExtension(), expected)
Esempio n. 10
0
 def testParents(self):
     """Test of splitting and parent walking."""
     parent = ButlerURI(self.tmpdir,
                        forceDirectory=True,
                        forceAbsolute=True)
     child_file = parent.join("subdir/file.txt")
     self.assertFalse(child_file.isdir())
     child_subdir, file = child_file.split()
     self.assertEqual(file, "file.txt")
     self.assertTrue(child_subdir.isdir())
     self.assertEqual(child_file.dirname(), child_subdir)
     self.assertEqual(child_file.basename(), file)
     self.assertEqual(child_file.parent(), child_subdir)
     derived_parent = child_subdir.parent()
     self.assertEqual(derived_parent, parent)
     self.assertTrue(derived_parent.isdir())
     self.assertEqual(child_file.parent().parent(), parent)
Esempio n. 11
0
    def guessCollectionNames(self, instrument: Instrument, root: str) -> None:
        """Update `runName` and `chainName` with guesses that match Gen3 naming
        conventions.

        If `chainName` is not `None`, and `runName` is, `runName` will be set
        from it.  If `runName` is already set, nothing will be changed, and
        if `chainName` is `None`, no chained collection will be created.

        Parameters
        ----------
        instrument : `Instrument`
            Instrument object for the repository being converted.
        root : `str`
            Path to the root repository.  If this is present at the start of
            ``self.path``, it will be stripped as part of generating the run
            name.

        Raises
        ------
        ValueError
            Raised if the appropriate collection names cannot be inferred.
        """
        if self.runName is not None:
            return
        if self.chainName is None:
            if os.path.isabs(self.path):
                rerunURI = ButlerURI(self.path)
                rootURI = ButlerURI(root)
                chainName = rerunURI.relative_to(rootURI)
                if chainName is None:
                    raise ValueError(
                        f"Cannot guess run name collection for rerun at '{self.path}': "
                        f"no clear relationship to root '{root}'."
                    )
            else:
                chainName = self.path
            chainName, _ = _dropPrefix(chainName, "rerun/")
            chainName, isPersonal = _dropPrefix(chainName, "private/")
            if isPersonal:
                chainName = f"u/{chainName}"
            else:
                chainName, _ = _dropPrefix(chainName, "shared/")
                chainName = instrument.makeCollectionName("runs", chainName)
            self.chainName = chainName
        self.runName = f"{self.chainName}/direct"
Esempio n. 12
0
    def _extractIngestInfo(self, path: str, ref: DatasetRef, *, formatter: Type[Formatter],
                           transfer: Optional[str] = None) -> StoredFileInfo:
        # Docstring inherited from FileLikeDatastore._extractIngestInfo.
        srcUri = ButlerURI(path)
        if transfer is None:
            rootUri = ButlerURI(self.root)
            p = pathlib.PurePosixPath(srcUri.relativeToPathRoot)
            pathInStore = str(p.relative_to(rootUri.relativeToPathRoot))
            tgtLocation = self.locationFactory.fromPath(pathInStore)
        else:
            assert transfer == "move" or transfer == "copy", "Should be guaranteed by _standardizeIngestPath"
            if srcUri.scheme == "file":
                # source is on local disk.
                template = self.templates.getTemplate(ref)
                location = self.locationFactory.fromPath(template.format(ref))
                tgtPathInStore = formatter.predictPathFromLocation(location)
                tgtLocation = self.locationFactory.fromPath(tgtPathInStore)
                self.client.upload_file(Bucket=tgtLocation.netloc, Key=tgtLocation.relativeToPathRoot,
                                        Filename=srcUri.ospath)
                if transfer == "move":
                    os.remove(srcUri.ospath)
            elif srcUri.scheme == "s3":
                # source is another S3 Bucket
                relpath = srcUri.relativeToPathRoot
                copySrc = {"Bucket": srcUri.netloc, "Key": relpath}
                self.client.copy(copySrc, self.locationFactory.netloc, relpath)
                if transfer == "move":
                    # https://github.com/boto/boto3/issues/507 - there is no
                    # way of knowing if the file was actually deleted except
                    # for checking all the keys again, reponse is  HTTP 204 OK
                    # response all the time
                    self.client.delete(Bucket=srcUri.netloc, Key=relpath)
                p = pathlib.PurePosixPath(srcUri.relativeToPathRoot)
                relativeToDatastoreRoot = str(p.relative_to(rootUri.relativeToPathRoot))
                tgtLocation = self.locationFactory.fromPath(relativeToDatastoreRoot)

        # the file should exist on the bucket by now
        exists, size = s3CheckFileExists(path=tgtLocation.relativeToPathRoot,
                                         bucket=tgtLocation.netloc,
                                         client=self.client)

        return StoredFileInfo(formatter=formatter, path=tgtLocation.pathInStore,
                              storageClass=ref.datasetType.storageClass,
                              file_size=size, checksum=None)
Esempio n. 13
0
    def testUriRoot(self):
        osPathRoot = pathlib.Path(__file__).absolute().root
        rootUris = (osPathRoot, "s3://bucket", "file://localhost/", "https://a.b.com")
        for uri_str in rootUris:
            uri = ButlerURI(uri_str, forceDirectory=True)
            self.assertEqual(uri.relativeToPathRoot, "./", f"Testing uri: {uri}")
            self.assertTrue(uri.is_root, f"Testing URI {uri} is a root URI")

        exampleLocalFile = os.path.join(osPathRoot, "a", "b", "c")
        uriStrings = (
            ("file://localhost/file.ext", "file.ext"),
            (exampleLocalFile, os.path.join("a", "b", "c")),
            ("s3://bucket/path/file.ext", "path/file.ext"),
            ("https://host.com/a/b/c.d", "a/b/c.d"),
        )

        for uri_str, result in uriStrings:
            uri = ButlerURI(uri_str)
            self.assertEqual(uri.relativeToPathRoot, result)
Esempio n. 14
0
    def testDirect(self):
        self._ingestRaws(transfer="direct")

        # Check that it really did have a URI outside of datastore
        srcUri = ButlerURI(self.file)
        butler = Butler(self.root, run=self.outputRun)
        datasets = list(
            butler.registry.queryDatasets("raw", collections=self.outputRun))
        datastoreUri = butler.getURI(datasets[0])
        self.assertEqual(datastoreUri, srcUri)
Esempio n. 15
0
    def testFile(self):
        file = os.path.join(self.tmpdir, "test.txt")
        uri = ButlerURI(file)
        self.assertFalse(uri.exists(), f"{uri} should not exist")
        self.assertEqual(uri.ospath, file)

        content = "abcdefghijklmnopqrstuv\n"
        uri.write(content.encode())
        self.assertTrue(os.path.exists(file), "File should exist locally")
        self.assertTrue(uri.exists(), f"{uri} should now exist")
        self.assertEqual(uri.read().decode(), content)
        self.assertEqual(uri.size(), len(content.encode()))

        with self.assertRaises(FileNotFoundError):
            ButlerURI("file/not/there.txt").size()

        # Check that creating a URI from a URI returns the same thing
        uri2 = ButlerURI(uri)
        self.assertEqual(uri, uri2)
        self.assertEqual(id(uri), id(uri2))
Esempio n. 16
0
    def testGetFileURL(self):

        s = f"https://{self.serverRoot}/{self.existingfolderName}/{self.existingfileName}"
        buri = ButlerURI(
            f"https://{self.serverRoot}/{self.existingfolderName}/{self.existingfileName}"
        )
        loc = Location(f"https://{self.serverRoot}/",
                       f"{self.existingfolderName}/{self.existingfileName}")

        self.assertEqual(_getFileURL(s), s)
        self.assertEqual(_getFileURL(buri), s)
        self.assertEqual(_getFileURL(loc), s)
Esempio n. 17
0
    def testFile(self):
        file = os.path.join(self.tmpdir, "test.txt")
        uri = ButlerURI(file)
        self.assertFalse(uri.exists(), f"{uri} should not exist")
        self.assertEqual(uri.ospath, file)

        content = "abcdefghijklmnopqrstuv\n"
        uri.write(content.encode())
        self.assertTrue(os.path.exists(file), "File should exist locally")
        self.assertTrue(uri.exists(), f"{uri} should now exist")
        self.assertEqual(uri.read().decode(), content)
Esempio n. 18
0
    def __init__(self, config, registry, butlerRoot=None):
        super().__init__(config, registry, butlerRoot)

        # Check that root is a valid URI for this datastore
        root = ButlerURI(self.root)
        if root.scheme and root.scheme != "file":
            raise ValueError(
                f"Root location must only be a file URI not {self.root}")

        self.root = root.path
        if not os.path.isdir(self.root):
            if "create" not in self.config or not self.config["create"]:
                raise ValueError(f"No valid root at: {self.root}")
            safeMakeDir(self.root)
Esempio n. 19
0
    def testUriExtensions(self):
        """Test extension extraction."""

        files = (("file.fits.gz", ".fits.gz"),
                 ("file.fits", ".fits"),
                 ("file.fits.xz", ".fits.xz"),
                 ("file.fits.tar", ".tar"),
                 ("file", ""),
                 ("flat_i_sim_1.4_blah.fits.gz", ".fits.gz"),
                 ("flat_i_sim_1.4_blah.txt", ".txt"),
                 ("flat_i_sim_1.4_blah.fits.fz", ".fits.fz"),
                 ("flat_i_sim_1.4_blah.fits.txt", ".txt"),
                 ("s3://bucket/c/a.b/", ""),
                 ("s3://bucket/c/a.b", ".b"),
                 ("file://localhost/c/a.b.gz", ".b.gz"),
                 )

        for file, expected in files:
            test_string = file
            if ":" not in test_string:
                test_string = f"a/b/{test_string}"
            uri = ButlerURI(test_string)
            self.assertEqual(uri.getExtension(), expected)
Esempio n. 20
0
    def _standardizeIngestPath(self,
                               path: str,
                               *,
                               transfer: Optional[str] = None) -> str:
        # Docstring inherited from FileLikeDatastore._standardizeIngestPath.
        if transfer not in (None, "move", "copy"):
            raise NotImplementedError(
                f"Transfer mode {transfer} not supported.")
        # ingest can occur from file->s3 and s3->s3 (source can be file or s3,
        # target will always be s3). File has to exist at target location. Two
        # Schemeless URIs are assumed to obey os.path rules. Equivalent to
        # os.path.exists(fullPath) check in PosixDatastore.
        srcUri = ButlerURI(path)
        if srcUri.scheme == 'file' or not srcUri.scheme:
            if not os.path.exists(srcUri.ospath):
                raise FileNotFoundError(f"File at '{srcUri}' does not exist.")
        elif srcUri.scheme == 's3':
            if not s3CheckFileExists(srcUri, client=self.client)[0]:
                raise FileNotFoundError(f"File at '{srcUri}' does not exist.")
        else:
            raise NotImplementedError(
                f"Scheme type {srcUri.scheme} not supported.")

        if transfer is None:
            rootUri = ButlerURI(self.root)
            if srcUri.scheme == "file":
                raise RuntimeError(
                    f"'{srcUri}' is not inside repository root '{rootUri}'. "
                    "Ingesting local data to S3Datastore without upload "
                    "to S3 is not allowed.")
            elif srcUri.scheme == "s3":
                if not srcUri.path.startswith(rootUri.path):
                    raise RuntimeError(
                        f"'{srcUri}' is not inside repository root '{rootUri}'."
                    )
        return path
Esempio n. 21
0
    def testExtension(self):
        file = ButlerURI(os.path.join(self.tmpdir, "test.txt"))
        self.assertEqual(file.updatedExtension(None), file)
        self.assertEqual(file.updatedExtension(".txt"), file)
        self.assertEqual(id(file.updatedExtension(".txt")), id(file))

        fits = file.updatedExtension(".fits.gz")
        self.assertEqual(fits.basename(), "test.fits.gz")
        self.assertEqual(
            fits.updatedExtension(".jpeg").basename(), "test.jpeg")
Esempio n. 22
0
    def getURIs(
        self,
        ref: DatasetRef,
        predict: bool = False
    ) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]:
        """Return URIs associated with dataset.

        Parameters
        ----------
        ref : `DatasetRef`
            Reference to the required dataset.
        predict : `bool`, optional
            If the datastore does not know about the dataset, should it
            return a predicted URI or not?

        Returns
        -------
        primary : `ButlerURI`
            The URI to the primary artifact associated with this dataset.
            If the dataset was disassembled within the datastore this
            may be `None`.
        components : `dict`
            URIs to any components associated with the dataset artifact.
            Can be empty if there are no components.

        Notes
        -----
        The URIs returned for in-memory datastores are not usable but
        provide an indication of the associated dataset.
        """

        # Include the dataID as a URI query
        query = urlencode(ref.dataId)

        # if this has never been written then we have to guess
        if not self.exists(ref):
            if not predict:
                raise FileNotFoundError(
                    "Dataset {} not in this datastore".format(ref))
            name = f"{ref.datasetType.name}"
            fragment = "#predicted"
        else:
            realID, _ = self._get_dataset_info(ref)
            name = f"{id(self.datasets[realID])}?{query}"
            fragment = ""

        return ButlerURI(f"mem://{name}?{query}{fragment}"), {}
Esempio n. 23
0
    def testTransfer(self):
        src = ButlerURI(os.path.join(self.tmpdir, "test.txt"))
        content = "Content is some content\nwith something to say\n\n"
        src.write(content.encode())

        dest = ButlerURI(self.makeS3Uri("test.txt"))
        self.assertFalse(dest.exists())
        dest.transfer_from(src, transfer="copy")
        self.assertTrue(dest.exists())

        dest2 = ButlerURI(self.makeS3Uri("copied.txt"))
        dest2.transfer_from(dest, transfer="copy")
        self.assertTrue(dest2.exists())

        local = ButlerURI(os.path.join(self.tmpdir, "copied.txt"))
        local.transfer_from(dest2, transfer="copy")
        with open(local.ospath, "r") as fd:
            new_content = fd.read()
        self.assertEqual(new_content, content)

        with self.assertRaises(ValueError):
            dest2.transfer_from(local, transfer="symlink")

        b = dest.read()
        self.assertEqual(b.decode(), new_content)

        nbytes = 10
        subset = dest.read(size=nbytes)
        self.assertEqual(len(subset), nbytes)  # Extra byte comes back
        self.assertEqual(subset.decode(), content[:nbytes])

        with self.assertRaises(FileExistsError):
            dest.transfer_from(src, transfer="copy")

        dest.transfer_from(src, transfer="copy", overwrite=True)
Esempio n. 24
0
    def testEscapes(self):
        """Special characters in file paths"""
        src = ButlerURI("bbb/???/test.txt", root=self.tmpdir, forceAbsolute=True)
        self.assertFalse(src.scheme)
        src.write(b"Some content")
        self.assertTrue(src.exists())

        # Use the internal API to force to a file
        file = src._force_to_file()
        self.assertTrue(file.exists())
        self.assertIn("???", file.ospath)
        self.assertNotIn("???", file.path)

        file.updateFile("tests??.txt")
        self.assertNotIn("??.txt", file.path)
        file.write(b"Other content")
        self.assertEqual(file.read(), b"Other content")

        src.updateFile("tests??.txt")
        self.assertIn("??.txt", src.path)
        self.assertEqual(file.read(), src.read(), f"reading from {file.ospath} and {src.ospath}")

        # File URI and schemeless URI
        parent = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/"))
        child = ButlerURI("e/f/g.txt", forceAbsolute=False)
        self.assertEqual(child.relative_to(parent), "e/f/g.txt")

        child = ButlerURI("e/f??#/g.txt", forceAbsolute=False)
        self.assertEqual(child.relative_to(parent), "e/f??#/g.txt")

        child = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/e/f??#/g.txt"))
        self.assertEqual(child.relative_to(parent), "e/f??#/g.txt")

        self.assertEqual(child.relativeToPathRoot, "a/b/c/de/??/e/f??#/g.txt")

        # Schemeless so should not quote
        dir = ButlerURI("bbb/???/", root=self.tmpdir, forceAbsolute=True, forceDirectory=True)
        self.assertIn("???", dir.ospath)
        self.assertIn("???", dir.path)
        self.assertFalse(dir.scheme)

        # dir.join() morphs into a file scheme
        new = dir.join("test_j.txt")
        self.assertIn("???", new.ospath, f"Checking {new}")
        new.write(b"Content")

        new2name = "###/test??.txt"
        new2 = dir.join(new2name)
        self.assertIn("???", new2.ospath)
        new2.write(b"Content")
        self.assertTrue(new2.ospath.endswith(new2name))
        self.assertEqual(new.read(), new2.read())

        fdir = dir._force_to_file()
        self.assertNotIn("???", fdir.path)
        self.assertIn("???", fdir.ospath)
        self.assertEqual(fdir.scheme, "file")
        fnew = dir.join("test_jf.txt")
        fnew.write(b"Content")

        fnew2 = fdir.join(new2name)
        fnew2.write(b"Content")
        self.assertTrue(fnew2.ospath.endswith(new2name))
        self.assertNotIn("###", fnew2.path)

        self.assertEqual(fnew.read(), fnew2.read())

        # Test that children relative to schemeless and file schemes
        # still return the same unquoted name
        self.assertEqual(fnew2.relative_to(fdir), new2name)
        self.assertEqual(fnew2.relative_to(dir), new2name)
        self.assertEqual(new2.relative_to(fdir), new2name, f"{new2} vs {fdir}")
        self.assertEqual(new2.relative_to(dir), new2name)

        # Check for double quoting
        plus_path = "/a/b/c+d/"
        with self.assertLogs(level="WARNING"):
            uri = ButlerURI(urllib.parse.quote(plus_path), forceDirectory=True)
        self.assertEqual(uri.ospath, plus_path)

        # Check that # is not escaped for schemeless URIs
        hash_path = "/a/b#/c&d#xyz"
        hpos = hash_path.rfind("#")
        uri = ButlerURI(hash_path)
        self.assertEqual(uri.ospath, hash_path[:hpos])
        self.assertEqual(uri.fragment, hash_path[hpos + 1:])
Esempio n. 25
0
class WebdavURITestCase(unittest.TestCase):

    def setUp(self):
        serverRoot = "www.not-exists.orgx"
        existingFolderName = "existingFolder"
        existingFileName = "existingFile"
        notExistingFileName = "notExistingFile"

        self.baseURL = ButlerURI(
            f"https://{serverRoot}", forceDirectory=True)
        self.existingFileButlerURI = ButlerURI(
            f"https://{serverRoot}/{existingFolderName}/{existingFileName}")
        self.notExistingFileButlerURI = ButlerURI(
            f"https://{serverRoot}/{existingFolderName}/{notExistingFileName}")
        self.existingFolderButlerURI = ButlerURI(
            f"https://{serverRoot}/{existingFolderName}", forceDirectory=True)
        self.notExistingFolderButlerURI = ButlerURI(
            f"https://{serverRoot}/{notExistingFileName}", forceDirectory=True)

        # Need to declare the options
        responses.add(responses.OPTIONS,
                      self.baseURL.geturl(),
                      status=200, headers={"DAV": "1,2,3"})

        # Used by ButlerHttpURI.exists()
        responses.add(responses.HEAD,
                      self.existingFileButlerURI.geturl(),
                      status=200, headers={'Content-Length': '1024'})
        responses.add(responses.HEAD,
                      self.notExistingFileButlerURI.geturl(),
                      status=404)

        # Used by ButlerHttpURI.read()
        responses.add(responses.GET,
                      self.existingFileButlerURI.geturl(),
                      status=200,
                      body=str.encode("It works!"))
        responses.add(responses.GET,
                      self.notExistingFileButlerURI.geturl(),
                      status=404)

        # Used by ButlerHttpURI.write()
        responses.add(responses.PUT,
                      self.existingFileButlerURI.geturl(),
                      status=201)

        # Used by ButlerHttpURI.transfer_from()
        responses.add(responses.Response(url=self.existingFileButlerURI.geturl(),
                                         method="COPY",
                                         headers={"Destination": self.existingFileButlerURI.geturl()},
                                         status=201))
        responses.add(responses.Response(url=self.existingFileButlerURI.geturl(),
                                         method="COPY",
                                         headers={"Destination": self.notExistingFileButlerURI.geturl()},
                                         status=201))
        responses.add(responses.Response(url=self.existingFileButlerURI.geturl(),
                                         method="MOVE",
                                         headers={"Destination": self.notExistingFileButlerURI.geturl()},
                                         status=201))

        # Used by ButlerHttpURI.remove()
        responses.add(responses.DELETE,
                      self.existingFileButlerURI.geturl(),
                      status=200)
        responses.add(responses.DELETE,
                      self.notExistingFileButlerURI.geturl(),
                      status=404)

        # Used by ButlerHttpURI.mkdir()
        responses.add(responses.HEAD,
                      self.existingFolderButlerURI.geturl(),
                      status=200, headers={'Content-Length': '1024'})
        responses.add(responses.HEAD,
                      self.baseURL.geturl(),
                      status=200, headers={'Content-Length': '1024'})
        responses.add(responses.HEAD,
                      self.notExistingFolderButlerURI.geturl(),
                      status=404)
        responses.add(responses.Response(url=self.notExistingFolderButlerURI.geturl(),
                                         method="MKCOL",
                                         status=201))
        responses.add(responses.Response(url=self.existingFolderButlerURI.geturl(),
                                         method="MKCOL",
                                         status=403))

    @responses.activate
    def testExists(self):

        self.assertTrue(self.existingFileButlerURI.exists())
        self.assertFalse(self.notExistingFileButlerURI.exists())

    @responses.activate
    def testRemove(self):

        self.assertIsNone(self.existingFileButlerURI.remove())
        with self.assertRaises(FileNotFoundError):
            self.notExistingFileButlerURI.remove()

    @responses.activate
    def testMkdir(self):

        # The mock means that we can't check this now exists
        self.notExistingFolderButlerURI.mkdir()

        # This should do nothing
        self.existingFolderButlerURI.mkdir()

        with self.assertRaises(ValueError):
            self.notExistingFileButlerURI.mkdir()

    @responses.activate
    def testRead(self):

        self.assertEqual(self.existingFileButlerURI.read().decode(), "It works!")
        self.assertNotEqual(self.existingFileButlerURI.read().decode(), "Nope.")
        with self.assertRaises(FileNotFoundError):
            self.notExistingFileButlerURI.read()

    @responses.activate
    def testWrite(self):

        self.assertIsNone(self.existingFileButlerURI.write(data=str.encode("Some content.")))
        with self.assertRaises(FileExistsError):
            self.existingFileButlerURI.write(data=str.encode("Some content."), overwrite=False)

    @responses.activate
    def testTransfer(self):

        self.assertIsNone(self.notExistingFileButlerURI.transfer_from(
            src=self.existingFileButlerURI))
        self.assertIsNone(self.notExistingFileButlerURI.transfer_from(
            src=self.existingFileButlerURI,
            transfer="move"))
        with self.assertRaises(FileExistsError):
            self.existingFileButlerURI.transfer_from(src=self.existingFileButlerURI)
        with self.assertRaises(ValueError):
            self.notExistingFileButlerURI.transfer_from(
                src=self.existingFileButlerURI,
                transfer="unsupported")

    def testParent(self):

        self.assertEqual(self.existingFolderButlerURI.geturl(),
                         self.notExistingFileButlerURI.parent().geturl())
        self.assertEqual(self.baseURL.geturl(),
                         self.baseURL.parent().geturl())
        self.assertEqual(self.existingFileButlerURI.parent().geturl(),
                         self.existingFileButlerURI.dirname().geturl())
Esempio n. 26
0
    def loadUri(cls,
                uri: Union[ButlerURI, str],
                universe: DimensionUniverse,
                nodes: Optional[Iterable[int]] = None,
                graphID: Optional[BuildId] = None) -> QuantumGraph:
        """Read `QuantumGraph` from a URI.

        Parameters
        ----------
        uri : `ButlerURI` or `str`
            URI from where to load the graph.
        universe: `~lsst.daf.butler.DimensionUniverse`
            DimensionUniverse instance, not used by the method itself but
            needed to ensure that registry data structures are initialized.
        nodes: iterable of `int` or None
            Numbers that correspond to nodes in the graph. If specified, only
            these nodes will be loaded. Defaults to None, in which case all
            nodes will be loaded.
        graphID : `str` or `None`
            If specified this ID is verified against the loaded graph prior to
            loading any Nodes. This defaults to None in which case no
            validation is done.

        Returns
        -------
        graph : `QuantumGraph`
            Resulting QuantumGraph instance.

        Raises
        ------
        TypeError
            Raised if pickle contains instance of a type other than
            QuantumGraph.
        ValueError
            Raised if one or more of the nodes requested is not in the
            `QuantumGraph` or if graphID parameter does not match the graph
            being loaded or if the supplied uri does not point at a valid
            `QuantumGraph` save file.


        Notes
        -----
        Reading Quanta from pickle requires existence of singleton
        DimensionUniverse which is usually instantiated during Registry
        initialization. To make sure that DimensionUniverse exists this method
        accepts dummy DimensionUniverse argument.
        """
        uri = ButlerURI(uri)
        # With ButlerURI we have the choice of always using a local file
        # or reading in the bytes directly. Reading in bytes can be more
        # efficient for reasonably-sized pickle files when the resource
        # is remote. For now use the local file variant. For a local file
        # as_local() does nothing.

        if uri.getExtension() in (".pickle", ".pkl"):
            with uri.as_local() as local, open(local.ospath, "rb") as fd:
                warnings.warn(
                    "Pickle graphs are deprecated, please re-save your graph with the save method"
                )
                qgraph = pickle.load(fd)
        elif uri.getExtension() in ('.qgraph'):
            with LoadHelper(uri) as loader:
                qgraph = loader.load(nodes, graphID)
        else:
            raise ValueError(
                "Only know how to handle files saved as `pickle`, `pkl`, or `qgraph`"
            )
        if not isinstance(qgraph, QuantumGraph):
            raise TypeError(
                f"QuantumGraph save file contains unexpected object type: {type(qgraph)}"
            )
        return qgraph
Esempio n. 27
0
    def setUp(self):
        serverRoot = "www.not-exists.orgx"
        existingFolderName = "existingFolder"
        existingFileName = "existingFile"
        notExistingFileName = "notExistingFile"

        self.baseURL = ButlerURI(
            f"https://{serverRoot}", forceDirectory=True)
        self.existingFileButlerURI = ButlerURI(
            f"https://{serverRoot}/{existingFolderName}/{existingFileName}")
        self.notExistingFileButlerURI = ButlerURI(
            f"https://{serverRoot}/{existingFolderName}/{notExistingFileName}")
        self.existingFolderButlerURI = ButlerURI(
            f"https://{serverRoot}/{existingFolderName}", forceDirectory=True)
        self.notExistingFolderButlerURI = ButlerURI(
            f"https://{serverRoot}/{notExistingFileName}", forceDirectory=True)

        # Need to declare the options
        responses.add(responses.OPTIONS,
                      self.baseURL.geturl(),
                      status=200, headers={"DAV": "1,2,3"})

        # Used by ButlerHttpURI.exists()
        responses.add(responses.HEAD,
                      self.existingFileButlerURI.geturl(),
                      status=200, headers={'Content-Length': '1024'})
        responses.add(responses.HEAD,
                      self.notExistingFileButlerURI.geturl(),
                      status=404)

        # Used by ButlerHttpURI.read()
        responses.add(responses.GET,
                      self.existingFileButlerURI.geturl(),
                      status=200,
                      body=str.encode("It works!"))
        responses.add(responses.GET,
                      self.notExistingFileButlerURI.geturl(),
                      status=404)

        # Used by ButlerHttpURI.write()
        responses.add(responses.PUT,
                      self.existingFileButlerURI.geturl(),
                      status=201)

        # Used by ButlerHttpURI.transfer_from()
        responses.add(responses.Response(url=self.existingFileButlerURI.geturl(),
                                         method="COPY",
                                         headers={"Destination": self.existingFileButlerURI.geturl()},
                                         status=201))
        responses.add(responses.Response(url=self.existingFileButlerURI.geturl(),
                                         method="COPY",
                                         headers={"Destination": self.notExistingFileButlerURI.geturl()},
                                         status=201))
        responses.add(responses.Response(url=self.existingFileButlerURI.geturl(),
                                         method="MOVE",
                                         headers={"Destination": self.notExistingFileButlerURI.geturl()},
                                         status=201))

        # Used by ButlerHttpURI.remove()
        responses.add(responses.DELETE,
                      self.existingFileButlerURI.geturl(),
                      status=200)
        responses.add(responses.DELETE,
                      self.notExistingFileButlerURI.geturl(),
                      status=404)

        # Used by ButlerHttpURI.mkdir()
        responses.add(responses.HEAD,
                      self.existingFolderButlerURI.geturl(),
                      status=200, headers={'Content-Length': '1024'})
        responses.add(responses.HEAD,
                      self.baseURL.geturl(),
                      status=200, headers={'Content-Length': '1024'})
        responses.add(responses.HEAD,
                      self.notExistingFolderButlerURI.geturl(),
                      status=404)
        responses.add(responses.Response(url=self.notExistingFolderButlerURI.geturl(),
                                         method="MKCOL",
                                         status=201))
        responses.add(responses.Response(url=self.existingFolderButlerURI.geturl(),
                                         method="MKCOL",
                                         status=403))
Esempio n. 28
0
    def testRelative(self):
        """Check that we can get subpaths back from two URIs"""
        parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True)
        child = ButlerURI(os.path.join(self.tmpdir, "dir1", "file.txt"), forceAbsolute=True)

        self.assertEqual(child.relative_to(parent), "dir1/file.txt")

        not_child = ButlerURI("/a/b/dir1/file.txt")
        self.assertFalse(not_child.relative_to(parent))

        not_directory = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt"))
        self.assertFalse(child.relative_to(not_directory))

        # Relative URIs
        parent = ButlerURI("a/b/", forceAbsolute=False)
        child = ButlerURI("a/b/c/d.txt", forceAbsolute=False)
        self.assertFalse(child.scheme)
        self.assertEqual(child.relative_to(parent), "c/d.txt")

        # File URI and schemeless URI
        parent = ButlerURI("file:/a/b/c/")
        child = ButlerURI("e/f/g.txt", forceAbsolute=False)

        # If the child is relative and the parent is absolute we assume
        # that the child is a child of the parent unless it uses ".."
        self.assertEqual(child.relative_to(parent), "e/f/g.txt")

        child = ButlerURI("../e/f/g.txt", forceAbsolute=False)
        self.assertFalse(child.relative_to(parent))

        child = ButlerURI("../c/e/f/g.txt", forceAbsolute=False)
        self.assertEqual(child.relative_to(parent), "e/f/g.txt")
Esempio n. 29
0
    def ingest(self, path, ref, formatter=None, transfer=None):
        """Add an on-disk file with the given `DatasetRef` to the store,
        possibly transferring it.

        The caller is responsible for ensuring that the given (or predicted)
        Formatter is consistent with how the file was written; `ingest` will
        in general silently ignore incorrect formatters (as it cannot
        efficiently verify their correctness), deferring errors until ``get``
        is first called on the ingested dataset.

        Parameters
        ----------
        path : `str`
            File path.  Treated as relative to the repository root if not
            absolute.
        ref : `DatasetRef`
            Reference to the associated Dataset.
        formatter : `Formatter` (optional)
            Formatter that should be used to retreive the Dataset.  If not
            provided, the formatter will be constructed according to
            Datastore configuration.
        transfer : str (optional)
            If not None, must be one of 'move' or 'copy' indicating how to
            transfer the file.  The new filename and location will be
            determined via template substitution, as with ``put``.  If the file
            is outside the datastore root, it must be transferred somehow.

        Raises
        ------
        RuntimeError
            Raised if ``transfer is None`` and path is outside the repository
            root.
        FileNotFoundError
            Raised if the file at ``path`` does not exist.
        FileExistsError
            Raised if ``transfer is not None`` but a file already exists at the
            location computed from the template.
        PermissionError
            Raised when check if file exists at target location in S3 can not
            be made because IAM user used lacks s3:GetObject or s3:ListBucket
            permissions.
        """
        if not self.constraints.isAcceptable(ref):
            # Raise rather than use boolean return value.
            raise DatasetTypeNotSupportedError(
                f"Dataset {ref} has been rejected by this datastore via"
                " configuration.")

        if formatter is None:
            formatter = self.formatterFactory.getFormatterClass(ref)

        # ingest can occur from file->s3 and s3->s3 (source can be file or s3,
        # target will always be s3). File has to exist at target location. Two
        # Schemeless URIs are assumed to obey os.path rules. Equivalent to
        # os.path.exists(fullPath) check in PosixDatastore.
        srcUri = ButlerURI(path)
        if srcUri.scheme == 'file' or not srcUri.scheme:
            if not os.path.exists(srcUri.ospath):
                raise FileNotFoundError(
                    f"File at '{srcUri}' does not exist; note that paths to ingest are "
                    "assumed to be relative to self.root unless they are absolute."
                )
        elif srcUri.scheme == 's3':
            if not s3CheckFileExists(srcUri, client=self.client)[0]:
                raise FileNotFoundError(
                    "File at '{}' does not exist; note that paths to ingest are "
                    "assumed to be relative to self.root unless they are absolute."
                    .format(srcUri))
        else:
            raise NotImplementedError(
                f"Scheme type {srcUri.scheme} not supported.")

        # Transfer is generaly None when put calls ingest. In that case file is
        # uploaded in put, or already in proper location, so source location
        # must be inside repository. In other cases, created target location
        # must be inside root and source file must be deleted when 'move'd.
        if transfer is None:
            rootUri = ButlerURI(self.root)
            if srcUri.scheme == "file":
                raise RuntimeError(
                    f"'{srcUri}' is not inside repository root '{rootUri}'. "
                    "Ingesting local data to S3Datastore without upload "
                    "to S3 is not allowed.")
            elif srcUri.scheme == "s3":
                if not srcUri.path.startswith(rootUri.path):
                    raise RuntimeError(
                        f"'{srcUri}' is not inside repository root '{rootUri}'."
                    )
            p = pathlib.PurePosixPath(srcUri.relativeToPathRoot)
            pathInStore = str(p.relative_to(rootUri.relativeToPathRoot))
            tgtLocation = self.locationFactory.fromPath(pathInStore)
        elif transfer == "move" or transfer == "copy":
            if srcUri.scheme == "file":
                # source is on local disk.
                template = self.templates.getTemplate(ref)
                location = self.locationFactory.fromPath(template.format(ref))
                tgtPathInStore = formatter.predictPathFromLocation(location)
                tgtLocation = self.locationFactory.fromPath(tgtPathInStore)
                self.client.upload_file(Bucket=tgtLocation.netloc,
                                        Key=tgtLocation.relativeToPathRoot,
                                        Filename=srcUri.ospath)
                if transfer == "move":
                    os.remove(srcUri.ospath)
            elif srcUri.scheme == "s3":
                # source is another S3 Bucket
                relpath = srcUri.relativeToPathRoot
                copySrc = {"Bucket": srcUri.netloc, "Key": relpath}
                self.client.copy(copySrc, self.locationFactory.netloc, relpath)
                if transfer == "move":
                    # https://github.com/boto/boto3/issues/507 - there is no
                    # way of knowing if the file was actually deleted except
                    # for checking all the keys again, reponse is  HTTP 204 OK
                    # response all the time
                    self.client.delete(Bucket=srcUri.netloc, Key=relpath)
                p = pathlib.PurePosixPath(srcUri.relativeToPathRoot)
                relativeToDatastoreRoot = str(
                    p.relative_to(rootUri.relativeToPathRoot))
                tgtLocation = self.locationFactory.fromPath(
                    relativeToDatastoreRoot)
        else:
            raise NotImplementedError(
                f"Transfer type '{transfer}' not supported.")

        # the file should exist on the bucket by now
        exists, size = s3CheckFileExists(path=tgtLocation.relativeToPathRoot,
                                         bucket=tgtLocation.netloc,
                                         client=self.client)

        # Update the registry
        self._register_dataset_file(ref, formatter, tgtLocation.pathInStore,
                                    size, None)
Esempio n. 30
0
 def testWrite(self):
     s3write = ButlerURI(self.makeS3Uri("created.txt"))
     content = "abcdefghijklmnopqrstuv\n"
     s3write.write(content.encode())
     self.assertEqual(s3write.read().decode(), content)