Ejemplo n.º 1
0
 def _checkOverwrite(self, outputDirPath):
     from lambdaimage.utils.common import raiseErrorIfPathExists
     raiseErrorIfPathExists(outputDirPath, awsCredentialsOverride=self.awsCredentialsOverride)
Ejemplo n.º 2
0
    def convertImagesToSeries(self, dataPath, outputDirPath, dims=None, inputFormat='stack', ext=None,
                              dtype='int16', blockSize="150M", blockSizeUnits="pixels", startIdx=None, stopIdx=None,
                              overwrite=False, recursive=False, nplanes=None, npartitions=None,
                              renumber=False, confFilename='conf.json'):
        """
        Write out Images data as Series data, saved in a flat binary format.

        The resulting files may subsequently be read in using lambdaimageContext.loadSeries().
        Loading Series data directly will likely be faster than converting image data
        to a Series object through loadImagesAsSeries().

        Parameters
        ----------
        dataPath: string
            Path to data files or directory, as either a local filesystem path or a URI.
            May include a single '*' wildcard in the filename. Examples of valid dataPaths include
            'local/directory/*.stack", "s3n:///my-s3-bucket/data/", or "file:///mnt/another/directory/".

        outputDirPath: string
            Path to directory to write Series file output. May be either a path on the local file system
            or a URI-like format, such as "local/directory", "s3n:///my-s3-bucket/data/",
            or "file:///mnt/another/directory/". If the directory exists and 'overwrite' is True,
            the existing directory and all its contents will be deleted and overwritten.

        dims: tuple of positive int, optional (required if inputFormat is 'stack')
            Image dimensions. Binary stack data will be interpreted as a multidimensional array
            with the given dimensions, and should be stored in row-major order (Fortran or Matlab convention),
            where the first dimension changes most rapidly. For 'png' or 'tif' data dimensions
            will be read from the image file headers.

        inputFormat: str, optional, default = 'stack'
            Expected format of the input data: 'stack', 'png', or 'tif'. 'stack' indicates flat binary stacks.
            'png' or 'tif' indicate image formats. Page of a multipage tif file will be extend along
            the third dimension. Separate files interpreted as distinct records, with ordering
            given by lexicographic sorting of file names.

        ext: string, optional, default = None
            File extension, default will be "bin" if inputFormat=="stack", "tif" for inputFormat=='tif',
            and 'png' for inputFormat=="png".

        dtype: string or numpy dtype. optional, default 'int16'
            Data type of the image files to be loaded, specified as a numpy "dtype" string.
            Ignored for 'tif' or 'png' (data will be inferred from image formats).

        blockSize: string or positive int, optional, default "150M"
            Requested size of blocks (e.g "64M", "512k", "2G"). If shuffle=True, can also be a
            tuple of int specifying the number of pixels or splits per dimension. Indirectly
            controls the number of Spark partitions, with one partition per block.

        blockSizeUnits: string, either "pixels" or "splits", default "pixels"
            Units for interpreting a tuple passed as blockSize when shuffle=True.

        startIdx: nonnegative int, optional, default = None
            Convenience parameters to read only a subset of input files. Uses python slice conventions
            (zero-based indexing with exclusive final position). These parameters give the starting
            and final index after lexicographic sorting.

        stopIdx: nonnegative int, optional, default = None
            See startIdx.

        overwrite: boolean, optional, default False
            If true, the directory specified by outputDirPath will be deleted (recursively) if it
            already exists. (Use with caution.)

        recursive: boolean, optional, default = False
            If true, will recursively descend directories rooted at dataPath, loading all files
            in the tree with an appropriate extension.

        nplanes: positive integer, optional, default = None
            Subdivide individual image files. Every `nplanes` from each file will be considered a new record.
            With nplanes=None (the default), a single file will be considered as representing a single record.
            If the number of records per file is not the same across all files, then `renumber` should be set
            to True to ensure consistent keys.

        npartitions: positive int, optional, default = None
            Specify number of partitions for the RDD, if unspecified will use 1 partition per image.

        renumber: boolean, optional, default = False
            Recalculate keys for records after images are loading. Only necessary if different files contain
            different number of records (e.g. due to specifying nplanes). See Images.renumber().

        confFilename : string, optional, default = 'conf.json'
            Name of conf file if using to specify parameters for binary stack data

        """
        checkParams(inputFormat, ['stack', 'tif', 'tif-stack'])

        if not overwrite:
            raiseErrorIfPathExists(outputDirPath, awsCredentialsOverride=self._credentials)
            overwrite = True  # prevent additional downstream checks for this path

        if not ext:
            ext = DEFAULT_EXTENSIONS.get(inputFormat.lower(), None)

        from lambdaimage.rdds.fileio.imagesloader import ImagesLoader
        loader = ImagesLoader(self._sc)
        if inputFormat.lower() == 'stack':
            images = loader.fromStack(dataPath, dims, ext=ext, dtype=dtype, startIdx=startIdx, stopIdx=stopIdx,
                                      recursive=recursive, nplanes=nplanes, npartitions=npartitions,
                                      confFilename=confFilename)
        else:
            # 'tif' or 'tif-stack'
            images = loader.fromTif(dataPath, ext=ext, startIdx=startIdx, stopIdx=stopIdx,
                                    recursive=recursive, nplanes=nplanes, npartitions=npartitions)
        if renumber:
            images = images.renumber()
        images.toBlocks(blockSize, units=blockSizeUnits).saveAsBinarySeries(outputDirPath, overwrite=overwrite)
Ejemplo n.º 3
0
 def _checkOverwrite(self, outputDirPath):
     """ Checks for existence of outputDirPath, raising ValueError if it already exists """
     from lambdaimage.utils.aws import AWSCredentials
     from lambdaimage.utils.common import raiseErrorIfPathExists
     awsCredentialOverride = AWSCredentials.fromContext(self.rdd.ctx)
     raiseErrorIfPathExists(outputDirPath, awsCredentialsOverride=awsCredentialOverride)