def testQuoting(self): """Check that quoting works.""" parent = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True) subpath = "rootdir/dir1+/file?.txt" child = ButlerURI(self.makeS3Uri(urllib.parse.quote(subpath))) self.assertEqual(child.relative_to(parent), "dir1+/file?.txt") self.assertEqual(child.basename(), "file?.txt") self.assertEqual(child.relativeToPathRoot, subpath) self.assertIn("%", child.path) self.assertEqual(child.unquoted_path, "/" + subpath)
def testButlerUriSerialization(self): """Test that we can pickle and yaml""" uri = ButlerURI("a/b/c/d") uri2 = pickle.loads(pickle.dumps(uri)) self.assertEqual(uri, uri2) self.assertFalse(uri2.dirLike) uri = ButlerURI("a/b/c/d", forceDirectory=True) uri2 = pickle.loads(pickle.dumps(uri)) self.assertEqual(uri, uri2) self.assertTrue(uri2.dirLike)
def _extractIngestInfo(self, path: str, ref: DatasetRef, *, formatter: Union[Formatter, Type[Formatter]], transfer: Optional[str] = None) -> StoredFileInfo: # Docstring inherited from FileLikeDatastore._extractIngestInfo. srcUri = ButlerURI(path) if transfer is None: rootUri = ButlerURI(self.root) p = pathlib.PurePosixPath(srcUri.relativeToPathRoot) pathInStore = str(p.relative_to(rootUri.relativeToPathRoot)) tgtLocation = self.locationFactory.fromPath(pathInStore) else: assert transfer == "move" or transfer == "copy", "Should be guaranteed by _standardizeIngestPath" # Work out the name we want this ingested file to have # inside the datastore tgtLocation = self._calculate_ingested_datastore_name( srcUri, ref, formatter) if srcUri.scheme == "file": # source is on local disk. with open(srcUri.ospath, 'rb') as f: self.client.put_object(Bucket=tgtLocation.netloc, Key=tgtLocation.relativeToPathRoot, Body=f) if transfer == "move": os.remove(srcUri.ospath) elif srcUri.scheme == "s3": # source is another S3 Bucket relpath = srcUri.relativeToPathRoot copySrc = {"Bucket": srcUri.netloc, "Key": relpath} self.client.copy(copySrc, self.locationFactory.netloc, tgtLocation.relativeToPathRoot) if transfer == "move": # https://github.com/boto/boto3/issues/507 - there is no # way of knowing if the file was actually deleted except # for checking all the keys again, reponse is HTTP 204 OK # response all the time self.client.delete(Bucket=srcUri.netloc, Key=relpath) # the file should exist on the bucket by now _, size = s3CheckFileExists(path=tgtLocation.relativeToPathRoot, bucket=tgtLocation.netloc, client=self.client) return StoredFileInfo(formatter=formatter, path=tgtLocation.pathInStore, storageClass=ref.datasetType.storageClass, component=ref.datasetType.component(), file_size=size, checksum=None)
def testEnvVar(self): """Test that environment variables are expanded.""" with unittest.mock.patch.dict(os.environ, {"MY_TEST_DIR": "/a/b/c"}): uri = ButlerURI("${MY_TEST_DIR}/d.txt") self.assertEqual(uri.path, "/a/b/c/d.txt") self.assertEqual(uri.scheme, "file") # This will not expand uri = ButlerURI("${MY_TEST_DIR}/d.txt", forceAbsolute=False) self.assertEqual(uri.path, "${MY_TEST_DIR}/d.txt") self.assertFalse(uri.scheme)
def testRelative(self): """Check that we can get subpaths back from two URIs""" parent = ButlerURI(self.makeS3Uri("rootdir"), forceDirectory=True) child = ButlerURI(self.makeS3Uri("rootdir/dir1/file.txt")) self.assertEqual(child.relative_to(parent), "dir1/file.txt") not_child = ButlerURI(self.makeS3Uri("/a/b/dir1/file.txt")) self.assertFalse(not_child.relative_to(parent)) not_s3 = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt")) self.assertFalse(child.relative_to(not_s3))
def testFileExists(self): self.assertTrue( s3CheckFileExists(client=self.client, bucket=self.bucketName, path=self.fileName)[0]) self.assertFalse( s3CheckFileExists(client=self.client, bucket=self.bucketName, path=self.fileName + "_NO_EXIST")[0]) datastoreRootUri = f"s3://{self.bucketName}/" uri = f"s3://{self.bucketName}/{self.fileName}" buri = ButlerURI(uri) location = Location(datastoreRootUri, self.fileName) self.assertTrue(s3CheckFileExists(client=self.client, path=buri)[0]) # just to make sure the overloaded keyword works correctly self.assertTrue(s3CheckFileExists(buri, client=self.client)[0]) self.assertTrue( s3CheckFileExists(client=self.client, path=location)[0]) # make sure supplying strings resolves correctly too self.assertTrue(s3CheckFileExists(uri, client=self.client)) self.assertTrue(s3CheckFileExists(uri))
def saveUri(self, uri): """Save `QuantumGraph` to the specified URI. Parameters ---------- uri : `ButlerURI` or `str` URI to where the graph should be saved. """ buffer = self._buildSaveObject() butlerUri = ButlerURI(uri) if butlerUri.getExtension() not in (".qgraph"): raise TypeError( f"Can currently only save a graph in qgraph format not {uri}") butlerUri.write( buffer ) # type: ignore # Ignore because bytearray is safe to use in place of bytes
def testResource(self): u = ButlerURI("resource://lsst.daf.butler/configs/datastore.yaml") self.assertTrue(u.exists(), f"Check {u} exists") content = u.read().decode() self.assertTrue(content.startswith("datastore:")) truncated = u.read(size=9).decode() self.assertEqual(truncated, "datastore") d = ButlerURI("resource://lsst.daf.butler/configs", forceDirectory=True) self.assertTrue(u.exists(), f"Check directory {d} exists") j = d.join("datastore.yaml") self.assertEqual(u, j) self.assertFalse(j.dirLike) self.assertFalse(d.join("not-there.yaml").exists())
def testUriExtensions(self): """Test extension extraction.""" files = (("file.fits.gz", ".fits.gz"), ("file.fits", ".fits"), ("file.fits.xz", ".fits.xz"), ("file.fits.tar", ".tar"), ("file", ""), ("flat_i_sim_1.4_blah.fits.gz", ".fits.gz"), ("flat_i_sim_1.4_blah.txt", ".txt"), ("flat_i_sim_1.4_blah.fits.fz", ".fits.fz"), ("flat_i_sim_1.4_blah.fits.txt", ".txt"), ) for file, expected in files: uri = ButlerURI(f"a/b/{file}") self.assertEqual(uri.getExtension(), expected)
def testParents(self): """Test of splitting and parent walking.""" parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True) child_file = parent.join("subdir/file.txt") self.assertFalse(child_file.isdir()) child_subdir, file = child_file.split() self.assertEqual(file, "file.txt") self.assertTrue(child_subdir.isdir()) self.assertEqual(child_file.dirname(), child_subdir) self.assertEqual(child_file.basename(), file) self.assertEqual(child_file.parent(), child_subdir) derived_parent = child_subdir.parent() self.assertEqual(derived_parent, parent) self.assertTrue(derived_parent.isdir()) self.assertEqual(child_file.parent().parent(), parent)
def guessCollectionNames(self, instrument: Instrument, root: str) -> None: """Update `runName` and `chainName` with guesses that match Gen3 naming conventions. If `chainName` is not `None`, and `runName` is, `runName` will be set from it. If `runName` is already set, nothing will be changed, and if `chainName` is `None`, no chained collection will be created. Parameters ---------- instrument : `Instrument` Instrument object for the repository being converted. root : `str` Path to the root repository. If this is present at the start of ``self.path``, it will be stripped as part of generating the run name. Raises ------ ValueError Raised if the appropriate collection names cannot be inferred. """ if self.runName is not None: return if self.chainName is None: if os.path.isabs(self.path): rerunURI = ButlerURI(self.path) rootURI = ButlerURI(root) chainName = rerunURI.relative_to(rootURI) if chainName is None: raise ValueError( f"Cannot guess run name collection for rerun at '{self.path}': " f"no clear relationship to root '{root}'." ) else: chainName = self.path chainName, _ = _dropPrefix(chainName, "rerun/") chainName, isPersonal = _dropPrefix(chainName, "private/") if isPersonal: chainName = f"u/{chainName}" else: chainName, _ = _dropPrefix(chainName, "shared/") chainName = instrument.makeCollectionName("runs", chainName) self.chainName = chainName self.runName = f"{self.chainName}/direct"
def _extractIngestInfo(self, path: str, ref: DatasetRef, *, formatter: Type[Formatter], transfer: Optional[str] = None) -> StoredFileInfo: # Docstring inherited from FileLikeDatastore._extractIngestInfo. srcUri = ButlerURI(path) if transfer is None: rootUri = ButlerURI(self.root) p = pathlib.PurePosixPath(srcUri.relativeToPathRoot) pathInStore = str(p.relative_to(rootUri.relativeToPathRoot)) tgtLocation = self.locationFactory.fromPath(pathInStore) else: assert transfer == "move" or transfer == "copy", "Should be guaranteed by _standardizeIngestPath" if srcUri.scheme == "file": # source is on local disk. template = self.templates.getTemplate(ref) location = self.locationFactory.fromPath(template.format(ref)) tgtPathInStore = formatter.predictPathFromLocation(location) tgtLocation = self.locationFactory.fromPath(tgtPathInStore) self.client.upload_file(Bucket=tgtLocation.netloc, Key=tgtLocation.relativeToPathRoot, Filename=srcUri.ospath) if transfer == "move": os.remove(srcUri.ospath) elif srcUri.scheme == "s3": # source is another S3 Bucket relpath = srcUri.relativeToPathRoot copySrc = {"Bucket": srcUri.netloc, "Key": relpath} self.client.copy(copySrc, self.locationFactory.netloc, relpath) if transfer == "move": # https://github.com/boto/boto3/issues/507 - there is no # way of knowing if the file was actually deleted except # for checking all the keys again, reponse is HTTP 204 OK # response all the time self.client.delete(Bucket=srcUri.netloc, Key=relpath) p = pathlib.PurePosixPath(srcUri.relativeToPathRoot) relativeToDatastoreRoot = str(p.relative_to(rootUri.relativeToPathRoot)) tgtLocation = self.locationFactory.fromPath(relativeToDatastoreRoot) # the file should exist on the bucket by now exists, size = s3CheckFileExists(path=tgtLocation.relativeToPathRoot, bucket=tgtLocation.netloc, client=self.client) return StoredFileInfo(formatter=formatter, path=tgtLocation.pathInStore, storageClass=ref.datasetType.storageClass, file_size=size, checksum=None)
def testUriRoot(self): osPathRoot = pathlib.Path(__file__).absolute().root rootUris = (osPathRoot, "s3://bucket", "file://localhost/", "https://a.b.com") for uri_str in rootUris: uri = ButlerURI(uri_str, forceDirectory=True) self.assertEqual(uri.relativeToPathRoot, "./", f"Testing uri: {uri}") self.assertTrue(uri.is_root, f"Testing URI {uri} is a root URI") exampleLocalFile = os.path.join(osPathRoot, "a", "b", "c") uriStrings = ( ("file://localhost/file.ext", "file.ext"), (exampleLocalFile, os.path.join("a", "b", "c")), ("s3://bucket/path/file.ext", "path/file.ext"), ("https://host.com/a/b/c.d", "a/b/c.d"), ) for uri_str, result in uriStrings: uri = ButlerURI(uri_str) self.assertEqual(uri.relativeToPathRoot, result)
def testDirect(self): self._ingestRaws(transfer="direct") # Check that it really did have a URI outside of datastore srcUri = ButlerURI(self.file) butler = Butler(self.root, run=self.outputRun) datasets = list( butler.registry.queryDatasets("raw", collections=self.outputRun)) datastoreUri = butler.getURI(datasets[0]) self.assertEqual(datastoreUri, srcUri)
def testFile(self): file = os.path.join(self.tmpdir, "test.txt") uri = ButlerURI(file) self.assertFalse(uri.exists(), f"{uri} should not exist") self.assertEqual(uri.ospath, file) content = "abcdefghijklmnopqrstuv\n" uri.write(content.encode()) self.assertTrue(os.path.exists(file), "File should exist locally") self.assertTrue(uri.exists(), f"{uri} should now exist") self.assertEqual(uri.read().decode(), content) self.assertEqual(uri.size(), len(content.encode())) with self.assertRaises(FileNotFoundError): ButlerURI("file/not/there.txt").size() # Check that creating a URI from a URI returns the same thing uri2 = ButlerURI(uri) self.assertEqual(uri, uri2) self.assertEqual(id(uri), id(uri2))
def testGetFileURL(self): s = f"https://{self.serverRoot}/{self.existingfolderName}/{self.existingfileName}" buri = ButlerURI( f"https://{self.serverRoot}/{self.existingfolderName}/{self.existingfileName}" ) loc = Location(f"https://{self.serverRoot}/", f"{self.existingfolderName}/{self.existingfileName}") self.assertEqual(_getFileURL(s), s) self.assertEqual(_getFileURL(buri), s) self.assertEqual(_getFileURL(loc), s)
def testFile(self): file = os.path.join(self.tmpdir, "test.txt") uri = ButlerURI(file) self.assertFalse(uri.exists(), f"{uri} should not exist") self.assertEqual(uri.ospath, file) content = "abcdefghijklmnopqrstuv\n" uri.write(content.encode()) self.assertTrue(os.path.exists(file), "File should exist locally") self.assertTrue(uri.exists(), f"{uri} should now exist") self.assertEqual(uri.read().decode(), content)
def __init__(self, config, registry, butlerRoot=None): super().__init__(config, registry, butlerRoot) # Check that root is a valid URI for this datastore root = ButlerURI(self.root) if root.scheme and root.scheme != "file": raise ValueError( f"Root location must only be a file URI not {self.root}") self.root = root.path if not os.path.isdir(self.root): if "create" not in self.config or not self.config["create"]: raise ValueError(f"No valid root at: {self.root}") safeMakeDir(self.root)
def testUriExtensions(self): """Test extension extraction.""" files = (("file.fits.gz", ".fits.gz"), ("file.fits", ".fits"), ("file.fits.xz", ".fits.xz"), ("file.fits.tar", ".tar"), ("file", ""), ("flat_i_sim_1.4_blah.fits.gz", ".fits.gz"), ("flat_i_sim_1.4_blah.txt", ".txt"), ("flat_i_sim_1.4_blah.fits.fz", ".fits.fz"), ("flat_i_sim_1.4_blah.fits.txt", ".txt"), ("s3://bucket/c/a.b/", ""), ("s3://bucket/c/a.b", ".b"), ("file://localhost/c/a.b.gz", ".b.gz"), ) for file, expected in files: test_string = file if ":" not in test_string: test_string = f"a/b/{test_string}" uri = ButlerURI(test_string) self.assertEqual(uri.getExtension(), expected)
def _standardizeIngestPath(self, path: str, *, transfer: Optional[str] = None) -> str: # Docstring inherited from FileLikeDatastore._standardizeIngestPath. if transfer not in (None, "move", "copy"): raise NotImplementedError( f"Transfer mode {transfer} not supported.") # ingest can occur from file->s3 and s3->s3 (source can be file or s3, # target will always be s3). File has to exist at target location. Two # Schemeless URIs are assumed to obey os.path rules. Equivalent to # os.path.exists(fullPath) check in PosixDatastore. srcUri = ButlerURI(path) if srcUri.scheme == 'file' or not srcUri.scheme: if not os.path.exists(srcUri.ospath): raise FileNotFoundError(f"File at '{srcUri}' does not exist.") elif srcUri.scheme == 's3': if not s3CheckFileExists(srcUri, client=self.client)[0]: raise FileNotFoundError(f"File at '{srcUri}' does not exist.") else: raise NotImplementedError( f"Scheme type {srcUri.scheme} not supported.") if transfer is None: rootUri = ButlerURI(self.root) if srcUri.scheme == "file": raise RuntimeError( f"'{srcUri}' is not inside repository root '{rootUri}'. " "Ingesting local data to S3Datastore without upload " "to S3 is not allowed.") elif srcUri.scheme == "s3": if not srcUri.path.startswith(rootUri.path): raise RuntimeError( f"'{srcUri}' is not inside repository root '{rootUri}'." ) return path
def testExtension(self): file = ButlerURI(os.path.join(self.tmpdir, "test.txt")) self.assertEqual(file.updatedExtension(None), file) self.assertEqual(file.updatedExtension(".txt"), file) self.assertEqual(id(file.updatedExtension(".txt")), id(file)) fits = file.updatedExtension(".fits.gz") self.assertEqual(fits.basename(), "test.fits.gz") self.assertEqual( fits.updatedExtension(".jpeg").basename(), "test.jpeg")
def getURIs( self, ref: DatasetRef, predict: bool = False ) -> Tuple[Optional[ButlerURI], Dict[str, ButlerURI]]: """Return URIs associated with dataset. Parameters ---------- ref : `DatasetRef` Reference to the required dataset. predict : `bool`, optional If the datastore does not know about the dataset, should it return a predicted URI or not? Returns ------- primary : `ButlerURI` The URI to the primary artifact associated with this dataset. If the dataset was disassembled within the datastore this may be `None`. components : `dict` URIs to any components associated with the dataset artifact. Can be empty if there are no components. Notes ----- The URIs returned for in-memory datastores are not usable but provide an indication of the associated dataset. """ # Include the dataID as a URI query query = urlencode(ref.dataId) # if this has never been written then we have to guess if not self.exists(ref): if not predict: raise FileNotFoundError( "Dataset {} not in this datastore".format(ref)) name = f"{ref.datasetType.name}" fragment = "#predicted" else: realID, _ = self._get_dataset_info(ref) name = f"{id(self.datasets[realID])}?{query}" fragment = "" return ButlerURI(f"mem://{name}?{query}{fragment}"), {}
def testTransfer(self): src = ButlerURI(os.path.join(self.tmpdir, "test.txt")) content = "Content is some content\nwith something to say\n\n" src.write(content.encode()) dest = ButlerURI(self.makeS3Uri("test.txt")) self.assertFalse(dest.exists()) dest.transfer_from(src, transfer="copy") self.assertTrue(dest.exists()) dest2 = ButlerURI(self.makeS3Uri("copied.txt")) dest2.transfer_from(dest, transfer="copy") self.assertTrue(dest2.exists()) local = ButlerURI(os.path.join(self.tmpdir, "copied.txt")) local.transfer_from(dest2, transfer="copy") with open(local.ospath, "r") as fd: new_content = fd.read() self.assertEqual(new_content, content) with self.assertRaises(ValueError): dest2.transfer_from(local, transfer="symlink") b = dest.read() self.assertEqual(b.decode(), new_content) nbytes = 10 subset = dest.read(size=nbytes) self.assertEqual(len(subset), nbytes) # Extra byte comes back self.assertEqual(subset.decode(), content[:nbytes]) with self.assertRaises(FileExistsError): dest.transfer_from(src, transfer="copy") dest.transfer_from(src, transfer="copy", overwrite=True)
def testEscapes(self): """Special characters in file paths""" src = ButlerURI("bbb/???/test.txt", root=self.tmpdir, forceAbsolute=True) self.assertFalse(src.scheme) src.write(b"Some content") self.assertTrue(src.exists()) # Use the internal API to force to a file file = src._force_to_file() self.assertTrue(file.exists()) self.assertIn("???", file.ospath) self.assertNotIn("???", file.path) file.updateFile("tests??.txt") self.assertNotIn("??.txt", file.path) file.write(b"Other content") self.assertEqual(file.read(), b"Other content") src.updateFile("tests??.txt") self.assertIn("??.txt", src.path) self.assertEqual(file.read(), src.read(), f"reading from {file.ospath} and {src.ospath}") # File URI and schemeless URI parent = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/")) child = ButlerURI("e/f/g.txt", forceAbsolute=False) self.assertEqual(child.relative_to(parent), "e/f/g.txt") child = ButlerURI("e/f??#/g.txt", forceAbsolute=False) self.assertEqual(child.relative_to(parent), "e/f??#/g.txt") child = ButlerURI("file:" + urllib.parse.quote("/a/b/c/de/??/e/f??#/g.txt")) self.assertEqual(child.relative_to(parent), "e/f??#/g.txt") self.assertEqual(child.relativeToPathRoot, "a/b/c/de/??/e/f??#/g.txt") # Schemeless so should not quote dir = ButlerURI("bbb/???/", root=self.tmpdir, forceAbsolute=True, forceDirectory=True) self.assertIn("???", dir.ospath) self.assertIn("???", dir.path) self.assertFalse(dir.scheme) # dir.join() morphs into a file scheme new = dir.join("test_j.txt") self.assertIn("???", new.ospath, f"Checking {new}") new.write(b"Content") new2name = "###/test??.txt" new2 = dir.join(new2name) self.assertIn("???", new2.ospath) new2.write(b"Content") self.assertTrue(new2.ospath.endswith(new2name)) self.assertEqual(new.read(), new2.read()) fdir = dir._force_to_file() self.assertNotIn("???", fdir.path) self.assertIn("???", fdir.ospath) self.assertEqual(fdir.scheme, "file") fnew = dir.join("test_jf.txt") fnew.write(b"Content") fnew2 = fdir.join(new2name) fnew2.write(b"Content") self.assertTrue(fnew2.ospath.endswith(new2name)) self.assertNotIn("###", fnew2.path) self.assertEqual(fnew.read(), fnew2.read()) # Test that children relative to schemeless and file schemes # still return the same unquoted name self.assertEqual(fnew2.relative_to(fdir), new2name) self.assertEqual(fnew2.relative_to(dir), new2name) self.assertEqual(new2.relative_to(fdir), new2name, f"{new2} vs {fdir}") self.assertEqual(new2.relative_to(dir), new2name) # Check for double quoting plus_path = "/a/b/c+d/" with self.assertLogs(level="WARNING"): uri = ButlerURI(urllib.parse.quote(plus_path), forceDirectory=True) self.assertEqual(uri.ospath, plus_path) # Check that # is not escaped for schemeless URIs hash_path = "/a/b#/c&d#xyz" hpos = hash_path.rfind("#") uri = ButlerURI(hash_path) self.assertEqual(uri.ospath, hash_path[:hpos]) self.assertEqual(uri.fragment, hash_path[hpos + 1:])
class WebdavURITestCase(unittest.TestCase): def setUp(self): serverRoot = "www.not-exists.orgx" existingFolderName = "existingFolder" existingFileName = "existingFile" notExistingFileName = "notExistingFile" self.baseURL = ButlerURI( f"https://{serverRoot}", forceDirectory=True) self.existingFileButlerURI = ButlerURI( f"https://{serverRoot}/{existingFolderName}/{existingFileName}") self.notExistingFileButlerURI = ButlerURI( f"https://{serverRoot}/{existingFolderName}/{notExistingFileName}") self.existingFolderButlerURI = ButlerURI( f"https://{serverRoot}/{existingFolderName}", forceDirectory=True) self.notExistingFolderButlerURI = ButlerURI( f"https://{serverRoot}/{notExistingFileName}", forceDirectory=True) # Need to declare the options responses.add(responses.OPTIONS, self.baseURL.geturl(), status=200, headers={"DAV": "1,2,3"}) # Used by ButlerHttpURI.exists() responses.add(responses.HEAD, self.existingFileButlerURI.geturl(), status=200, headers={'Content-Length': '1024'}) responses.add(responses.HEAD, self.notExistingFileButlerURI.geturl(), status=404) # Used by ButlerHttpURI.read() responses.add(responses.GET, self.existingFileButlerURI.geturl(), status=200, body=str.encode("It works!")) responses.add(responses.GET, self.notExistingFileButlerURI.geturl(), status=404) # Used by ButlerHttpURI.write() responses.add(responses.PUT, self.existingFileButlerURI.geturl(), status=201) # Used by ButlerHttpURI.transfer_from() responses.add(responses.Response(url=self.existingFileButlerURI.geturl(), method="COPY", headers={"Destination": self.existingFileButlerURI.geturl()}, status=201)) responses.add(responses.Response(url=self.existingFileButlerURI.geturl(), method="COPY", headers={"Destination": self.notExistingFileButlerURI.geturl()}, status=201)) responses.add(responses.Response(url=self.existingFileButlerURI.geturl(), method="MOVE", headers={"Destination": self.notExistingFileButlerURI.geturl()}, status=201)) # Used by ButlerHttpURI.remove() responses.add(responses.DELETE, self.existingFileButlerURI.geturl(), status=200) responses.add(responses.DELETE, self.notExistingFileButlerURI.geturl(), status=404) # Used by ButlerHttpURI.mkdir() responses.add(responses.HEAD, self.existingFolderButlerURI.geturl(), status=200, headers={'Content-Length': '1024'}) responses.add(responses.HEAD, self.baseURL.geturl(), status=200, headers={'Content-Length': '1024'}) responses.add(responses.HEAD, self.notExistingFolderButlerURI.geturl(), status=404) responses.add(responses.Response(url=self.notExistingFolderButlerURI.geturl(), method="MKCOL", status=201)) responses.add(responses.Response(url=self.existingFolderButlerURI.geturl(), method="MKCOL", status=403)) @responses.activate def testExists(self): self.assertTrue(self.existingFileButlerURI.exists()) self.assertFalse(self.notExistingFileButlerURI.exists()) @responses.activate def testRemove(self): self.assertIsNone(self.existingFileButlerURI.remove()) with self.assertRaises(FileNotFoundError): self.notExistingFileButlerURI.remove() @responses.activate def testMkdir(self): # The mock means that we can't check this now exists self.notExistingFolderButlerURI.mkdir() # This should do nothing self.existingFolderButlerURI.mkdir() with self.assertRaises(ValueError): self.notExistingFileButlerURI.mkdir() @responses.activate def testRead(self): self.assertEqual(self.existingFileButlerURI.read().decode(), "It works!") self.assertNotEqual(self.existingFileButlerURI.read().decode(), "Nope.") with self.assertRaises(FileNotFoundError): self.notExistingFileButlerURI.read() @responses.activate def testWrite(self): self.assertIsNone(self.existingFileButlerURI.write(data=str.encode("Some content."))) with self.assertRaises(FileExistsError): self.existingFileButlerURI.write(data=str.encode("Some content."), overwrite=False) @responses.activate def testTransfer(self): self.assertIsNone(self.notExistingFileButlerURI.transfer_from( src=self.existingFileButlerURI)) self.assertIsNone(self.notExistingFileButlerURI.transfer_from( src=self.existingFileButlerURI, transfer="move")) with self.assertRaises(FileExistsError): self.existingFileButlerURI.transfer_from(src=self.existingFileButlerURI) with self.assertRaises(ValueError): self.notExistingFileButlerURI.transfer_from( src=self.existingFileButlerURI, transfer="unsupported") def testParent(self): self.assertEqual(self.existingFolderButlerURI.geturl(), self.notExistingFileButlerURI.parent().geturl()) self.assertEqual(self.baseURL.geturl(), self.baseURL.parent().geturl()) self.assertEqual(self.existingFileButlerURI.parent().geturl(), self.existingFileButlerURI.dirname().geturl())
def loadUri(cls, uri: Union[ButlerURI, str], universe: DimensionUniverse, nodes: Optional[Iterable[int]] = None, graphID: Optional[BuildId] = None) -> QuantumGraph: """Read `QuantumGraph` from a URI. Parameters ---------- uri : `ButlerURI` or `str` URI from where to load the graph. universe: `~lsst.daf.butler.DimensionUniverse` DimensionUniverse instance, not used by the method itself but needed to ensure that registry data structures are initialized. nodes: iterable of `int` or None Numbers that correspond to nodes in the graph. If specified, only these nodes will be loaded. Defaults to None, in which case all nodes will be loaded. graphID : `str` or `None` If specified this ID is verified against the loaded graph prior to loading any Nodes. This defaults to None in which case no validation is done. Returns ------- graph : `QuantumGraph` Resulting QuantumGraph instance. Raises ------ TypeError Raised if pickle contains instance of a type other than QuantumGraph. ValueError Raised if one or more of the nodes requested is not in the `QuantumGraph` or if graphID parameter does not match the graph being loaded or if the supplied uri does not point at a valid `QuantumGraph` save file. Notes ----- Reading Quanta from pickle requires existence of singleton DimensionUniverse which is usually instantiated during Registry initialization. To make sure that DimensionUniverse exists this method accepts dummy DimensionUniverse argument. """ uri = ButlerURI(uri) # With ButlerURI we have the choice of always using a local file # or reading in the bytes directly. Reading in bytes can be more # efficient for reasonably-sized pickle files when the resource # is remote. For now use the local file variant. For a local file # as_local() does nothing. if uri.getExtension() in (".pickle", ".pkl"): with uri.as_local() as local, open(local.ospath, "rb") as fd: warnings.warn( "Pickle graphs are deprecated, please re-save your graph with the save method" ) qgraph = pickle.load(fd) elif uri.getExtension() in ('.qgraph'): with LoadHelper(uri) as loader: qgraph = loader.load(nodes, graphID) else: raise ValueError( "Only know how to handle files saved as `pickle`, `pkl`, or `qgraph`" ) if not isinstance(qgraph, QuantumGraph): raise TypeError( f"QuantumGraph save file contains unexpected object type: {type(qgraph)}" ) return qgraph
def setUp(self): serverRoot = "www.not-exists.orgx" existingFolderName = "existingFolder" existingFileName = "existingFile" notExistingFileName = "notExistingFile" self.baseURL = ButlerURI( f"https://{serverRoot}", forceDirectory=True) self.existingFileButlerURI = ButlerURI( f"https://{serverRoot}/{existingFolderName}/{existingFileName}") self.notExistingFileButlerURI = ButlerURI( f"https://{serverRoot}/{existingFolderName}/{notExistingFileName}") self.existingFolderButlerURI = ButlerURI( f"https://{serverRoot}/{existingFolderName}", forceDirectory=True) self.notExistingFolderButlerURI = ButlerURI( f"https://{serverRoot}/{notExistingFileName}", forceDirectory=True) # Need to declare the options responses.add(responses.OPTIONS, self.baseURL.geturl(), status=200, headers={"DAV": "1,2,3"}) # Used by ButlerHttpURI.exists() responses.add(responses.HEAD, self.existingFileButlerURI.geturl(), status=200, headers={'Content-Length': '1024'}) responses.add(responses.HEAD, self.notExistingFileButlerURI.geturl(), status=404) # Used by ButlerHttpURI.read() responses.add(responses.GET, self.existingFileButlerURI.geturl(), status=200, body=str.encode("It works!")) responses.add(responses.GET, self.notExistingFileButlerURI.geturl(), status=404) # Used by ButlerHttpURI.write() responses.add(responses.PUT, self.existingFileButlerURI.geturl(), status=201) # Used by ButlerHttpURI.transfer_from() responses.add(responses.Response(url=self.existingFileButlerURI.geturl(), method="COPY", headers={"Destination": self.existingFileButlerURI.geturl()}, status=201)) responses.add(responses.Response(url=self.existingFileButlerURI.geturl(), method="COPY", headers={"Destination": self.notExistingFileButlerURI.geturl()}, status=201)) responses.add(responses.Response(url=self.existingFileButlerURI.geturl(), method="MOVE", headers={"Destination": self.notExistingFileButlerURI.geturl()}, status=201)) # Used by ButlerHttpURI.remove() responses.add(responses.DELETE, self.existingFileButlerURI.geturl(), status=200) responses.add(responses.DELETE, self.notExistingFileButlerURI.geturl(), status=404) # Used by ButlerHttpURI.mkdir() responses.add(responses.HEAD, self.existingFolderButlerURI.geturl(), status=200, headers={'Content-Length': '1024'}) responses.add(responses.HEAD, self.baseURL.geturl(), status=200, headers={'Content-Length': '1024'}) responses.add(responses.HEAD, self.notExistingFolderButlerURI.geturl(), status=404) responses.add(responses.Response(url=self.notExistingFolderButlerURI.geturl(), method="MKCOL", status=201)) responses.add(responses.Response(url=self.existingFolderButlerURI.geturl(), method="MKCOL", status=403))
def testRelative(self): """Check that we can get subpaths back from two URIs""" parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True) child = ButlerURI(os.path.join(self.tmpdir, "dir1", "file.txt"), forceAbsolute=True) self.assertEqual(child.relative_to(parent), "dir1/file.txt") not_child = ButlerURI("/a/b/dir1/file.txt") self.assertFalse(not_child.relative_to(parent)) not_directory = ButlerURI(os.path.join(self.tmpdir, "dir1", "file2.txt")) self.assertFalse(child.relative_to(not_directory)) # Relative URIs parent = ButlerURI("a/b/", forceAbsolute=False) child = ButlerURI("a/b/c/d.txt", forceAbsolute=False) self.assertFalse(child.scheme) self.assertEqual(child.relative_to(parent), "c/d.txt") # File URI and schemeless URI parent = ButlerURI("file:/a/b/c/") child = ButlerURI("e/f/g.txt", forceAbsolute=False) # If the child is relative and the parent is absolute we assume # that the child is a child of the parent unless it uses ".." self.assertEqual(child.relative_to(parent), "e/f/g.txt") child = ButlerURI("../e/f/g.txt", forceAbsolute=False) self.assertFalse(child.relative_to(parent)) child = ButlerURI("../c/e/f/g.txt", forceAbsolute=False) self.assertEqual(child.relative_to(parent), "e/f/g.txt")
def ingest(self, path, ref, formatter=None, transfer=None): """Add an on-disk file with the given `DatasetRef` to the store, possibly transferring it. The caller is responsible for ensuring that the given (or predicted) Formatter is consistent with how the file was written; `ingest` will in general silently ignore incorrect formatters (as it cannot efficiently verify their correctness), deferring errors until ``get`` is first called on the ingested dataset. Parameters ---------- path : `str` File path. Treated as relative to the repository root if not absolute. ref : `DatasetRef` Reference to the associated Dataset. formatter : `Formatter` (optional) Formatter that should be used to retreive the Dataset. If not provided, the formatter will be constructed according to Datastore configuration. transfer : str (optional) If not None, must be one of 'move' or 'copy' indicating how to transfer the file. The new filename and location will be determined via template substitution, as with ``put``. If the file is outside the datastore root, it must be transferred somehow. Raises ------ RuntimeError Raised if ``transfer is None`` and path is outside the repository root. FileNotFoundError Raised if the file at ``path`` does not exist. FileExistsError Raised if ``transfer is not None`` but a file already exists at the location computed from the template. PermissionError Raised when check if file exists at target location in S3 can not be made because IAM user used lacks s3:GetObject or s3:ListBucket permissions. """ if not self.constraints.isAcceptable(ref): # Raise rather than use boolean return value. raise DatasetTypeNotSupportedError( f"Dataset {ref} has been rejected by this datastore via" " configuration.") if formatter is None: formatter = self.formatterFactory.getFormatterClass(ref) # ingest can occur from file->s3 and s3->s3 (source can be file or s3, # target will always be s3). File has to exist at target location. Two # Schemeless URIs are assumed to obey os.path rules. Equivalent to # os.path.exists(fullPath) check in PosixDatastore. srcUri = ButlerURI(path) if srcUri.scheme == 'file' or not srcUri.scheme: if not os.path.exists(srcUri.ospath): raise FileNotFoundError( f"File at '{srcUri}' does not exist; note that paths to ingest are " "assumed to be relative to self.root unless they are absolute." ) elif srcUri.scheme == 's3': if not s3CheckFileExists(srcUri, client=self.client)[0]: raise FileNotFoundError( "File at '{}' does not exist; note that paths to ingest are " "assumed to be relative to self.root unless they are absolute." .format(srcUri)) else: raise NotImplementedError( f"Scheme type {srcUri.scheme} not supported.") # Transfer is generaly None when put calls ingest. In that case file is # uploaded in put, or already in proper location, so source location # must be inside repository. In other cases, created target location # must be inside root and source file must be deleted when 'move'd. if transfer is None: rootUri = ButlerURI(self.root) if srcUri.scheme == "file": raise RuntimeError( f"'{srcUri}' is not inside repository root '{rootUri}'. " "Ingesting local data to S3Datastore without upload " "to S3 is not allowed.") elif srcUri.scheme == "s3": if not srcUri.path.startswith(rootUri.path): raise RuntimeError( f"'{srcUri}' is not inside repository root '{rootUri}'." ) p = pathlib.PurePosixPath(srcUri.relativeToPathRoot) pathInStore = str(p.relative_to(rootUri.relativeToPathRoot)) tgtLocation = self.locationFactory.fromPath(pathInStore) elif transfer == "move" or transfer == "copy": if srcUri.scheme == "file": # source is on local disk. template = self.templates.getTemplate(ref) location = self.locationFactory.fromPath(template.format(ref)) tgtPathInStore = formatter.predictPathFromLocation(location) tgtLocation = self.locationFactory.fromPath(tgtPathInStore) self.client.upload_file(Bucket=tgtLocation.netloc, Key=tgtLocation.relativeToPathRoot, Filename=srcUri.ospath) if transfer == "move": os.remove(srcUri.ospath) elif srcUri.scheme == "s3": # source is another S3 Bucket relpath = srcUri.relativeToPathRoot copySrc = {"Bucket": srcUri.netloc, "Key": relpath} self.client.copy(copySrc, self.locationFactory.netloc, relpath) if transfer == "move": # https://github.com/boto/boto3/issues/507 - there is no # way of knowing if the file was actually deleted except # for checking all the keys again, reponse is HTTP 204 OK # response all the time self.client.delete(Bucket=srcUri.netloc, Key=relpath) p = pathlib.PurePosixPath(srcUri.relativeToPathRoot) relativeToDatastoreRoot = str( p.relative_to(rootUri.relativeToPathRoot)) tgtLocation = self.locationFactory.fromPath( relativeToDatastoreRoot) else: raise NotImplementedError( f"Transfer type '{transfer}' not supported.") # the file should exist on the bucket by now exists, size = s3CheckFileExists(path=tgtLocation.relativeToPathRoot, bucket=tgtLocation.netloc, client=self.client) # Update the registry self._register_dataset_file(ref, formatter, tgtLocation.pathInStore, size, None)
def testWrite(self): s3write = ButlerURI(self.makeS3Uri("created.txt")) content = "abcdefghijklmnopqrstuv\n" s3write.write(content.encode()) self.assertEqual(s3write.read().decode(), content)