def _checkOverwrite(self, outputDirPath): from lambdaimage.utils.common import raiseErrorIfPathExists raiseErrorIfPathExists(outputDirPath, awsCredentialsOverride=self.awsCredentialsOverride)
def convertImagesToSeries(self, dataPath, outputDirPath, dims=None, inputFormat='stack', ext=None, dtype='int16', blockSize="150M", blockSizeUnits="pixels", startIdx=None, stopIdx=None, overwrite=False, recursive=False, nplanes=None, npartitions=None, renumber=False, confFilename='conf.json'): """ Write out Images data as Series data, saved in a flat binary format. The resulting files may subsequently be read in using lambdaimageContext.loadSeries(). Loading Series data directly will likely be faster than converting image data to a Series object through loadImagesAsSeries(). Parameters ---------- dataPath: string Path to data files or directory, as either a local filesystem path or a URI. May include a single '*' wildcard in the filename. Examples of valid dataPaths include 'local/directory/*.stack", "s3n:///my-s3-bucket/data/", or "file:///mnt/another/directory/". outputDirPath: string Path to directory to write Series file output. May be either a path on the local file system or a URI-like format, such as "local/directory", "s3n:///my-s3-bucket/data/", or "file:///mnt/another/directory/". If the directory exists and 'overwrite' is True, the existing directory and all its contents will be deleted and overwritten. dims: tuple of positive int, optional (required if inputFormat is 'stack') Image dimensions. Binary stack data will be interpreted as a multidimensional array with the given dimensions, and should be stored in row-major order (Fortran or Matlab convention), where the first dimension changes most rapidly. For 'png' or 'tif' data dimensions will be read from the image file headers. inputFormat: str, optional, default = 'stack' Expected format of the input data: 'stack', 'png', or 'tif'. 'stack' indicates flat binary stacks. 'png' or 'tif' indicate image formats. Page of a multipage tif file will be extend along the third dimension. Separate files interpreted as distinct records, with ordering given by lexicographic sorting of file names. ext: string, optional, default = None File extension, default will be "bin" if inputFormat=="stack", "tif" for inputFormat=='tif', and 'png' for inputFormat=="png". dtype: string or numpy dtype. optional, default 'int16' Data type of the image files to be loaded, specified as a numpy "dtype" string. Ignored for 'tif' or 'png' (data will be inferred from image formats). blockSize: string or positive int, optional, default "150M" Requested size of blocks (e.g "64M", "512k", "2G"). If shuffle=True, can also be a tuple of int specifying the number of pixels or splits per dimension. Indirectly controls the number of Spark partitions, with one partition per block. blockSizeUnits: string, either "pixels" or "splits", default "pixels" Units for interpreting a tuple passed as blockSize when shuffle=True. startIdx: nonnegative int, optional, default = None Convenience parameters to read only a subset of input files. Uses python slice conventions (zero-based indexing with exclusive final position). These parameters give the starting and final index after lexicographic sorting. stopIdx: nonnegative int, optional, default = None See startIdx. overwrite: boolean, optional, default False If true, the directory specified by outputDirPath will be deleted (recursively) if it already exists. (Use with caution.) recursive: boolean, optional, default = False If true, will recursively descend directories rooted at dataPath, loading all files in the tree with an appropriate extension. nplanes: positive integer, optional, default = None Subdivide individual image files. Every `nplanes` from each file will be considered a new record. With nplanes=None (the default), a single file will be considered as representing a single record. If the number of records per file is not the same across all files, then `renumber` should be set to True to ensure consistent keys. npartitions: positive int, optional, default = None Specify number of partitions for the RDD, if unspecified will use 1 partition per image. renumber: boolean, optional, default = False Recalculate keys for records after images are loading. Only necessary if different files contain different number of records (e.g. due to specifying nplanes). See Images.renumber(). confFilename : string, optional, default = 'conf.json' Name of conf file if using to specify parameters for binary stack data """ checkParams(inputFormat, ['stack', 'tif', 'tif-stack']) if not overwrite: raiseErrorIfPathExists(outputDirPath, awsCredentialsOverride=self._credentials) overwrite = True # prevent additional downstream checks for this path if not ext: ext = DEFAULT_EXTENSIONS.get(inputFormat.lower(), None) from lambdaimage.rdds.fileio.imagesloader import ImagesLoader loader = ImagesLoader(self._sc) if inputFormat.lower() == 'stack': images = loader.fromStack(dataPath, dims, ext=ext, dtype=dtype, startIdx=startIdx, stopIdx=stopIdx, recursive=recursive, nplanes=nplanes, npartitions=npartitions, confFilename=confFilename) else: # 'tif' or 'tif-stack' images = loader.fromTif(dataPath, ext=ext, startIdx=startIdx, stopIdx=stopIdx, recursive=recursive, nplanes=nplanes, npartitions=npartitions) if renumber: images = images.renumber() images.toBlocks(blockSize, units=blockSizeUnits).saveAsBinarySeries(outputDirPath, overwrite=overwrite)
def _checkOverwrite(self, outputDirPath): """ Checks for existence of outputDirPath, raising ValueError if it already exists """ from lambdaimage.utils.aws import AWSCredentials from lambdaimage.utils.common import raiseErrorIfPathExists awsCredentialOverride = AWSCredentials.fromContext(self.rdd.ctx) raiseErrorIfPathExists(outputDirPath, awsCredentialsOverride=awsCredentialOverride)