Ejemplo n.º 1
0
    def __init__(self, config, registry, butlerRoot=None):
        super().__init__(config, registry, butlerRoot)

        if not os.path.isdir(self.root):
            if "create" not in self.config or not self.config["create"]:
                raise ValueError(f"No valid root at: {self.root}")
            safeMakeDir(self.root)
Ejemplo n.º 2
0
    def __init__(self, config, registry):
        super().__init__(config, registry)
        self.root = self.config['root']
        if not os.path.isdir(self.root):
            if 'create' not in self.config or not self.config['create']:
                raise ValueError("No valid root at: {0}".format(self.root))
            safeMakeDir(self.root)

        self.locationFactory = LocationFactory(self.root)
        self.formatterFactory = FormatterFactory()
        self.storageClassFactory = StorageClassFactory()

        # Now associate formatters with storage classes
        for name, f in self.config["formatters"].items():
            self.formatterFactory.registerFormatter(name, f)

        # Read the file naming templates
        self.templates = FileTemplates(self.config["templates"])

        # Name ourselves
        self.name = "POSIXDatastore@{}".format(self.root)

        # Storage of paths and formatters, keyed by dataset_id
        types = {
            "path": str,
            "formatter": str,
            "storage_class": str,
            "dataset_id": int
        }
        self.records = DatabaseDict.fromConfig(self.config["records"],
                                               types=types,
                                               value=self.RecordTuple,
                                               key="dataset_id",
                                               registry=registry)
Ejemplo n.º 3
0
    def __init__(self, config, registry, butlerRoot=None):
        super().__init__(config, registry)
        if "root" not in self.config:
            raise ValueError("No root directory specified in configuration")

        # Name ourselves either using an explicit name or a name
        # derived from the (unexpanded) root
        if "name" in self.config:
            self.name = self.config["name"]
        else:
            self.name = "POSIXDatastore@{}".format(self.config["root"])

        # Support repository relocation in config
        self.root = replaceRoot(self.config["root"], butlerRoot)

        if not os.path.isdir(self.root):
            if "create" not in self.config or not self.config["create"]:
                raise ValueError(f"No valid root at: {self.root}")
            safeMakeDir(self.root)

        self.locationFactory = LocationFactory(self.root)
        self.formatterFactory = FormatterFactory()
        self.storageClassFactory = StorageClassFactory()

        # Now associate formatters with storage classes
        self.formatterFactory.registerFormatters(
            self.config["formatters"], universe=self.registry.dimensions)

        # Read the file naming templates
        self.templates = FileTemplates(self.config["templates"],
                                       universe=self.registry.dimensions)

        # And read the constraints list
        constraintsConfig = self.config.get("constraints")
        self.constraints = Constraints(constraintsConfig,
                                       universe=self.registry.dimensions)

        # Storage of paths and formatters, keyed by dataset_id
        types = {
            "path": str,
            "formatter": str,
            "storage_class": str,
            "file_size": int,
            "checksum": str,
            "dataset_id": int
        }
        lengths = {
            "path": 256,
            "formatter": 128,
            "storage_class": 64,
            "checksum": 128
        }
        self.records = DatabaseDict.fromConfig(self.config["records"],
                                               types=types,
                                               value=self.RecordTuple,
                                               key="dataset_id",
                                               lengths=lengths,
                                               registry=registry)
    def put(self, inMemoryDataset, ref):
        """Write a InMemoryDataset with a given `DatasetRef` to the store.

        Parameters
        ----------
        inMemoryDataset : `object`
            The Dataset to store.
        ref : `DatasetRef`
            Reference to the associated Dataset.
        """

        datasetType = ref.datasetType
        typeName = datasetType.name
        storageClass = datasetType.storageClass

        # Sanity check
        if not isinstance(inMemoryDataset, storageClass.pytype):
            raise ValueError("Inconsistency between supplied object ({}) "
                             "and storage class type ({})".format(
                                 type(inMemoryDataset), storageClass.pytype))

        # Work out output file name
        template = self.templates.getTemplate(typeName)
        location = self.locationFactory.fromPath(template.format(ref))

        # Get the formatter based on the storage class
        formatter = self.formatterFactory.getFormatter(
            datasetType.storageClass, typeName)

        storageDir = os.path.dirname(location.path)
        if not os.path.isdir(storageDir):
            safeMakeDir(storageDir)

        # Write the file
        path = formatter.write(
            inMemoryDataset, FileDescriptor(location,
                                            storageClass=storageClass))

        # Create Storage information in the registry
        ospath = os.path.join(self.root, path)
        checksum = self.computeChecksum(ospath)
        stat = os.stat(ospath)
        size = stat.st_size
        info = StorageInfo(self.name, checksum, size)
        self.registry.addStorageInfo(ref, info)

        # Associate this dataset with the formatter for later read.
        fileInfo = StoredFileInfo(formatter, path, storageClass)
        self.addStoredFileInfo(ref, fileInfo)

        # Register all components with same information
        for compRef in ref.components.values():
            self.registry.addStorageInfo(compRef, info)
            self.addStoredFileInfo(compRef, fileInfo)
Ejemplo n.º 5
0
 def _extractIngestInfo(self,
                        path: str,
                        ref: DatasetRef,
                        *,
                        formatter: Type[Formatter],
                        transfer: Optional[str] = None) -> StoredFileInfo:
     # Docstring inherited from FileLikeDatastore._extractIngestInfo.
     fullPath = os.path.normpath(os.path.join(self.root, path))
     if transfer is not None:
         template = self.templates.getTemplate(ref)
         location = self.locationFactory.fromPath(template.format(ref))
         newPath = formatter.predictPathFromLocation(location)
         newFullPath = os.path.join(self.root, newPath)
         if os.path.exists(newFullPath):
             raise FileExistsError(f"File '{newFullPath}' already exists.")
         storageDir = os.path.dirname(newFullPath)
         if not os.path.isdir(storageDir):
             with self._transaction.undoWith("mkdir", os.rmdir, storageDir):
                 safeMakeDir(storageDir)
         if transfer == "move":
             with self._transaction.undoWith("move", shutil.move,
                                             newFullPath, fullPath):
                 shutil.move(fullPath, newFullPath)
         elif transfer == "copy":
             with self._transaction.undoWith("copy", os.remove,
                                             newFullPath):
                 shutil.copy(fullPath, newFullPath)
         elif transfer == "hardlink":
             with self._transaction.undoWith("hardlink", os.unlink,
                                             newFullPath):
                 os.link(fullPath, newFullPath)
         elif transfer == "symlink":
             with self._transaction.undoWith("symlink", os.unlink,
                                             newFullPath):
                 os.symlink(fullPath, newFullPath)
         else:
             raise NotImplementedError(
                 "Transfer type '{}' not supported.".format(transfer))
         path = newPath
         fullPath = newFullPath
     if self.useChecksum:
         checksum = self.computeChecksum(fullPath)
     else:
         checksum = None
     stat = os.stat(fullPath)
     size = stat.st_size
     return StoredFileInfo(formatter=formatter,
                           path=path,
                           storageClass=ref.datasetType.storageClass,
                           file_size=size,
                           checksum=checksum)
Ejemplo n.º 6
0
    def __init__(self, config, registry, butlerRoot=None):
        super().__init__(config, registry, butlerRoot)

        # Check that root is a valid URI for this datastore
        root = ButlerURI(self.root)
        if root.scheme and root.scheme != "file":
            raise ValueError(
                f"Root location must only be a file URI not {self.root}")

        self.root = root.path
        if not os.path.isdir(self.root):
            if "create" not in self.config or not self.config["create"]:
                raise ValueError(f"No valid root at: {self.root}")
            safeMakeDir(self.root)
Ejemplo n.º 7
0
    def put(self, inMemoryDataset, ref):
        """Write a InMemoryDataset with a given `DatasetRef` to the store.

        Parameters
        ----------
        inMemoryDataset : `object`
            The Dataset to store.
        ref : `DatasetRef`
            Reference to the associated Dataset.

        Raises
        ------
        TypeError
            Supplied object and storage class are inconsistent.
        DatasetTypeNotSupportedError
            The associated `DatasetType` is not handled by this datastore.

        Notes
        -----
        If the datastore is configured to reject certain dataset types it
        is possible that the put will fail and raise a
        `DatasetTypeNotSupportedError`.  The main use case for this is to
        allow `ChainedDatastore` to put to multiple datastores without
        requiring that every datastore accepts the dataset.
        """
        location, formatter = self._prepare_for_put(inMemoryDataset, ref)

        storageDir = os.path.dirname(location.path)
        if not os.path.isdir(storageDir):
            with self._transaction.undoWith("mkdir", os.rmdir, storageDir):
                safeMakeDir(storageDir)

        # Write the file
        predictedFullPath = os.path.join(self.root, formatter.predictPath())

        if os.path.exists(predictedFullPath):
            raise FileExistsError(
                f"Cannot write file for ref {ref} as "
                f"output file {predictedFullPath} already exists")

        with self._transaction.undoWith("write", os.remove, predictedFullPath):
            path = formatter.write(inMemoryDataset)
            assert predictedFullPath == os.path.join(self.root, path)
            log.debug("Wrote file to %s", path)

        self.ingest(path, ref, formatter=formatter)
Ejemplo n.º 8
0
    def setUp(self):
        self.root = tempfile.mkdtemp(dir=TESTDIR)

        # Make a new repository in one place
        self.dir1 = os.path.join(self.root, "dir1")
        Butler.makeRepo(self.dir1, config=Config(self.configFile))

        # Move the yaml file to a different place and add a "root"
        self.dir2 = os.path.join(self.root, "dir2")
        safeMakeDir(self.dir2)
        configFile1 = os.path.join(self.dir1, "butler.yaml")
        config = Config(configFile1)
        config["root"] = self.dir1
        configFile2 = os.path.join(self.dir2, "butler2.yaml")
        config.dumpToFile(configFile2)
        os.remove(configFile1)
        self.tmpConfigFile = configFile2
Ejemplo n.º 9
0
    def put(self, inMemoryDataset, ref):
        """Write a InMemoryDataset with a given `DatasetRef` to the store.

        Parameters
        ----------
        inMemoryDataset : `object`
            The Dataset to store.
        ref : `DatasetRef`
            Reference to the associated Dataset.
        """

        datasetType = ref.datasetType
        typeName = datasetType.name
        storageClass = datasetType.storageClass

        # Sanity check
        if not isinstance(inMemoryDataset, storageClass.pytype):
            raise ValueError("Inconsistency between supplied object ({}) "
                             "and storage class type ({})".format(
                                 type(inMemoryDataset), storageClass.pytype))

        # Work out output file name
        template = self.templates.getTemplate(typeName)
        location = self.locationFactory.fromPath(template.format(ref))

        # Get the formatter based on the storage class
        formatter = self.formatterFactory.getFormatter(
            datasetType.storageClass, typeName)

        storageDir = os.path.dirname(location.path)
        if not os.path.isdir(storageDir):
            safeMakeDir(storageDir)

        # Write the file
        path = formatter.write(
            inMemoryDataset, FileDescriptor(location,
                                            storageClass=storageClass))

        self.ingest(path, ref, formatter=formatter)
Ejemplo n.º 10
0
    def __init__(self, config, registry):
        super().__init__(config, registry)
        self.root = self.config['root']
        if not os.path.isdir(self.root):
            if 'create' not in self.config or not self.config['create']:
                raise ValueError("No valid root at: {0}".format(self.root))
            safeMakeDir(self.root)

        self.locationFactory = LocationFactory(self.root)
        self.formatterFactory = FormatterFactory()
        self.storageClassFactory = StorageClassFactory()

        # Now associate formatters with storage classes
        for name, f in self.config["formatters"].items():
            self.formatterFactory.registerFormatter(name, f)

        # Read the file naming templates
        self.templates = FileTemplates(self.config["templates"])

        # Name ourselves
        self.name = "POSIXDatastore@{}".format(self.root)

        # Somewhere to temporarily store dataset to formatter maps
        self.internalRegistry = {}
Ejemplo n.º 11
0
    def ingest(self, path, ref, formatter=None, transfer=None):
        """Add an on-disk file with the given `DatasetRef` to the store,
        possibly transferring it.

        The caller is responsible for ensuring that the given (or predicted)
        Formatter is consistent with how the file was written; `ingest` will
        in general silently ignore incorrect formatters (as it cannot
        efficiently verify their correctness), deferring errors until ``get``
        is first called on the ingested dataset.

        Parameters
        ----------
        path : `str`
            File path.  Treated as relative to the repository root if not
            absolute.
        ref : `DatasetRef`
            Reference to the associated Dataset.
        formatter : `Formatter`, optional
            Formatter that should be used to retreive the Dataset.  If not
            provided, the formatter will be constructed according to
            Datastore configuration.  Can be a the Formatter class or an
            instance.
        transfer : str (optional)
            If not None, must be one of 'move', 'copy', 'hardlink', or
            'symlink' indicating how to transfer the file.  The new
            filename and location will be determined via template substitution,
            as with ``put``.  If the file is outside the datastore root, it
            must be transferred somehow.

        Raises
        ------
        RuntimeError
            Raised if ``transfer is None`` and path is outside the repository
            root.
        FileNotFoundError
            Raised if the file at ``path`` does not exist.
        FileExistsError
            Raised if ``transfer is not None`` but a file already exists at the
            location computed from the template.
        DatasetTypeNotSupportedError
            The associated `DatasetType` is not handled by this datastore.
        """

        # Confirm that we can accept this dataset
        if not self.constraints.isAcceptable(ref):
            # Raise rather than use boolean return value.
            raise DatasetTypeNotSupportedError(
                f"Dataset {ref} has been rejected by this datastore via"
                " configuration.")

        if formatter is None:
            formatter = self.formatterFactory.getFormatterClass(ref)

        fullPath = os.path.normpath(os.path.join(self.root, path))
        if not os.path.exists(fullPath):
            raise FileNotFoundError(
                "File at '{}' does not exist; note that paths to ingest are "
                "assumed to be relative to self.root unless they are absolute."
                .format(fullPath))

        if transfer is None:
            if os.path.isabs(path):
                absRoot = os.path.abspath(self.root)
                if os.path.commonpath([absRoot, path]) != absRoot:
                    raise RuntimeError(
                        "'{}' is not inside repository root '{}'".format(
                            path, self.root))
                path = os.path.relpath(path, absRoot)
            elif path.startswith(os.path.pardir):
                raise RuntimeError(
                    f"'{path}' is outside repository root '{self.root}'")
        else:
            template = self.templates.getTemplate(ref)
            location = self.locationFactory.fromPath(template.format(ref))
            newPath = formatter.predictPathFromLocation(location)
            newFullPath = os.path.join(self.root, newPath)
            if os.path.exists(newFullPath):
                raise FileExistsError(
                    "File '{}' already exists".format(newFullPath))
            storageDir = os.path.dirname(newFullPath)
            if not os.path.isdir(storageDir):
                with self._transaction.undoWith("mkdir", os.rmdir, storageDir):
                    safeMakeDir(storageDir)
            if transfer == "move":
                with self._transaction.undoWith("move", shutil.move,
                                                newFullPath, fullPath):
                    shutil.move(fullPath, newFullPath)
            elif transfer == "copy":
                with self._transaction.undoWith("copy", os.remove,
                                                newFullPath):
                    shutil.copy(fullPath, newFullPath)
            elif transfer == "hardlink":
                with self._transaction.undoWith("hardlink", os.unlink,
                                                newFullPath):
                    os.link(fullPath, newFullPath)
            elif transfer == "symlink":
                with self._transaction.undoWith("symlink", os.unlink,
                                                newFullPath):
                    os.symlink(fullPath, newFullPath)
            else:
                raise NotImplementedError(
                    "Transfer type '{}' not supported.".format(transfer))
            path = newPath
            fullPath = newFullPath

        # Create Storage information in the registry
        checksum = self.computeChecksum(fullPath)
        stat = os.stat(fullPath)
        size = stat.st_size

        # Update the registry
        self._register_dataset_file(ref, formatter, path, size, checksum)
Ejemplo n.º 12
0
    def put(self, inMemoryDataset, ref):
        """Write a InMemoryDataset with a given `DatasetRef` to the store.

        Parameters
        ----------
        inMemoryDataset : `object`
            The Dataset to store.
        ref : `DatasetRef`
            Reference to the associated Dataset.

        Raises
        ------
        TypeError
            Supplied object and storage class are inconsistent.
        DatasetTypeNotSupportedError
            The associated `DatasetType` is not handled by this datastore.

        Notes
        -----
        If the datastore is configured to reject certain dataset types it
        is possible that the put will fail and raise a
        `DatasetTypeNotSupportedError`.  The main use case for this is to
        allow `ChainedDatastore` to put to multiple datastores without
        requiring that every datastore accepts the dataset.
        """
        datasetType = ref.datasetType
        storageClass = datasetType.storageClass

        # Sanity check
        if not isinstance(inMemoryDataset, storageClass.pytype):
            raise TypeError("Inconsistency between supplied object ({}) "
                            "and storage class type ({})".format(
                                type(inMemoryDataset), storageClass.pytype))

        # Confirm that we can accept this dataset
        if not self.constraints.isAcceptable(ref):
            # Raise rather than use boolean return value.
            raise DatasetTypeNotSupportedError(
                f"Dataset {ref} has been rejected by this datastore via"
                " configuration.")

        # Work out output file name
        try:
            template = self.templates.getTemplate(ref)
        except KeyError as e:
            raise DatasetTypeNotSupportedError(
                f"Unable to find template for {ref}") from e

        location = self.locationFactory.fromPath(template.format(ref))

        # Get the formatter based on the storage class
        try:
            formatter = self.formatterFactory.getFormatter(ref)
        except KeyError as e:
            raise DatasetTypeNotSupportedError(
                f"Unable to find formatter for {ref}") from e

        storageDir = os.path.dirname(location.path)
        if not os.path.isdir(storageDir):
            with self._transaction.undoWith("mkdir", os.rmdir, storageDir):
                safeMakeDir(storageDir)

        # Write the file
        predictedFullPath = os.path.join(self.root,
                                         formatter.predictPath(location))

        if os.path.exists(predictedFullPath):
            raise FileExistsError(
                f"Cannot write file for ref {ref} as "
                f"output file {predictedFullPath} already exists")

        with self._transaction.undoWith("write", os.remove, predictedFullPath):
            path = formatter.write(
                inMemoryDataset,
                FileDescriptor(location, storageClass=storageClass))
            assert predictedFullPath == os.path.join(self.root, path)
            log.debug("Wrote file to %s", path)

        self.ingest(path, ref, formatter=formatter)
Ejemplo n.º 13
0
    def put(self, inMemoryDataset, ref):
        """Write a InMemoryDataset with a given `DatasetRef` to the store.

        Parameters
        ----------
        inMemoryDataset : `object`
            The Dataset to store.
        ref : `DatasetRef`
            Reference to the associated Dataset.

        Raises
        ------
        TypeError
            Supplied object and storage class are inconsistent.
        DatasetTypeNotSupportedError
            The associated `DatasetType` is not handled by this datastore.

        Notes
        -----
        If the datastore is configured to reject certain dataset types it
        is possible that the put will fail and raise a
        `DatasetTypeNotSupportedError`.  The main use case for this is to
        allow `ChainedDatastore` to put to multiple datastores without
        requiring that every datastore accepts the dataset.
        """
        location, formatter = self._prepare_for_put(inMemoryDataset, ref)

        storageDir = os.path.dirname(location.path)
        if not os.path.isdir(storageDir):
            # Never try to remove this after creating it since there might
            # be a butler ingest process running concurrently that will
            # already think this directory exists.
            safeMakeDir(storageDir)

        # Write the file
        predictedFullPath = os.path.join(self.root, formatter.predictPath())

        if os.path.exists(predictedFullPath):
            raise FileExistsError(
                f"Cannot write file for ref {ref} as "
                f"output file {predictedFullPath} already exists")

        def _removeFileExists(path):
            """Remove a file and do not complain if it is not there.

            This is important since a formatter might fail before the file
            is written and we should not confuse people by writing spurious
            error messages to the log.
            """
            try:
                os.remove(path)
            except FileNotFoundError:
                pass

        formatter_exception = None
        with self._transaction.undoWith("write", _removeFileExists,
                                        predictedFullPath):
            try:
                path = formatter.write(inMemoryDataset)
                log.debug("Wrote file to %s", path)
            except Exception as e:
                formatter_exception = e

        if formatter_exception:
            raise formatter_exception

        assert predictedFullPath == os.path.join(self.root, path)

        info = self._extractIngestInfo(path, ref, formatter=formatter)
        self._register_datasets([(ref, info)])