def _prepare_for_get(self, ref, parameters=None): """Check parameters for ``get`` and obtain formatter and location. Parameters ---------- ref : `DatasetRef` Reference to the required Dataset. parameters : `dict` `StorageClass`-specific parameters that specify, for example, a slice of the Dataset to be loaded. Returns ------- getInfo : `DatastoreFileGetInformation` Parameters needed to retrieve the file. """ log.debug("Retrieve %s from %s with parameters %s", ref, self.name, parameters) # Get file metadata and internal metadata location, storedFileInfo = self._get_dataset_location_info(ref) if location is None: raise FileNotFoundError(f"Could not retrieve Dataset {ref}.") # We have a write storage class and a read storage class and they # can be different for concrete composites. readStorageClass = ref.datasetType.storageClass writeStorageClass = storedFileInfo.storageClass # Check that the supplied parameters are suitable for the type read readStorageClass.validateParameters(parameters) # Is this a component request? component = ref.datasetType.component() formatter = getInstanceOf( storedFileInfo.formatter, FileDescriptor(location, readStorageClass=readStorageClass, storageClass=writeStorageClass, parameters=parameters), ref.dataId) formatterParams, assemblerParams = formatter.segregateParameters() return DatastoreFileGetInformation(location, formatter, storedFileInfo, assemblerParams, component, readStorageClass)
def get(self, ref, parameters=None): """Load an InMemoryDataset from the store. Parameters ---------- ref : `DatasetRef` Reference to the required Dataset. parameters : `dict` `StorageClass`-specific parameters that specify, for example, a slice of the Dataset to be loaded. Returns ------- inMemoryDataset : `object` Requested Dataset or slice thereof as an InMemoryDataset. Raises ------ FileNotFoundError Requested dataset can not be retrieved. TypeError Return value from formatter has unexpected type. ValueError Formatter failed to process the dataset. """ # Get file metadata and internal metadata try: storageInfo = self.registry.getStorageInfo(ref, self.name) storedFileInfo = self.getStoredFileInfo(ref) except KeyError: raise FileNotFoundError( "Could not retrieve Dataset {}".format(ref)) # Use the path to determine the location location = self.locationFactory.fromPath(storedFileInfo.path) # Too expensive to recalculate the checksum on fetch # but we can check size and existence if not os.path.exists(location.path): raise FileNotFoundError( "Dataset with Id {} does not seem to exist at" " expected location of {}".format(ref.id, location.path)) stat = os.stat(location.path) size = stat.st_size if size != storageInfo.size: raise RuntimeError( "Integrity failure in Datastore. Size of file {} ({}) does not" " match recorded size of {}".format(location.path, size, storageInfo.size)) # We have a write storage class and a read storage class and they # can be different for concrete composites. readStorageClass = ref.datasetType.storageClass writeStorageClass = storedFileInfo.storageClass # Is this a component request? comp = ref.datasetType.component() formatter = getInstanceOf(storedFileInfo.formatter) try: result = formatter.read( FileDescriptor(location, readStorageClass=readStorageClass, storageClass=writeStorageClass, parameters=parameters), comp) except Exception as e: raise ValueError( "Failure from formatter for Dataset {}: {}".format(ref.id, e)) # Validate the returned data type matches the expected data type pytype = readStorageClass.pytype if pytype and not isinstance(result, pytype): raise TypeError( "Got type {} from formatter but expected {}".format( type(result), pytype)) return result