def loadImagesFromArray(self, values, npartitions=None): """ Load Images data from a local array Parameters ---------- values : list or ndarray A list of 2d or 3d numpy arrays, or a single 3d or 4d numpy array npartitions : position int, optional, default = None Number of partitions for RDD, if unspecified will use default parallelism. """ from numpy import ndarray, asarray from lambdaimage.rdds.fileio.imagesloader import ImagesLoader loader = ImagesLoader(self._sc) if isinstance(values, list): values = asarray(values) if isinstance(values, ndarray) and values.ndim > 2: values = list(values) if not npartitions: npartitions = self._sc.defaultParallelism return loader.fromArrays(values, npartitions=npartitions)
def test_toSeriesWithPack(self): ary = arange(8, dtype=dtypeFunc('int16')).reshape((2, 4)) image = ImagesLoader(self.sc).fromArrays(ary) series = image.toBlocks("150M").toSeries() seriesVals = series.collect() seriesAry = series.pack() seriesAry_xpose = series.pack(transpose=True) # check ordering of keys assert_equals((0, 0), seriesVals[0][0]) # first key assert_equals((1, 0), seriesVals[1][0]) # second key assert_equals((0, 1), seriesVals[2][0]) assert_equals((1, 1), seriesVals[3][0]) assert_equals((0, 2), seriesVals[4][0]) assert_equals((1, 2), seriesVals[5][0]) assert_equals((0, 3), seriesVals[6][0]) assert_equals((1, 3), seriesVals[7][0]) # check dimensions tuple matches numpy shape assert_equals(image.dims.count, series.dims.count) assert_equals(ary.shape, series.dims.count) # check that values are in Fortran-convention order collectedVals = array([kv[1] for kv in seriesVals], dtype=dtypeFunc('int16')).ravel() assert_true(array_equal(ary.ravel(order='F'), collectedVals)) # check that packing returns original array assert_true(array_equal(ary, seriesAry)) assert_true(array_equal(ary.T, seriesAry_xpose))
def test_toSeriesWithInefficientSplitAndSortedPack(self): ary = arange(8, dtype=dtypeFunc('int16')).reshape((4, 2)) image = ImagesLoader(self.sc).fromArrays(ary) series = image.toBlocks((2, 1), units="s").toSeries() seriesVals = series.collect() seriesAry = series.pack(sorting=True) # check ordering of keys assert_equals((0, 0), seriesVals[0][0]) # first key assert_equals((1, 0), seriesVals[1][0]) # second key assert_equals((0, 1), seriesVals[2][0]) assert_equals((1, 1), seriesVals[3][0]) # end of first block # beginning of second block assert_equals((2, 0), seriesVals[4][0]) assert_equals((3, 0), seriesVals[5][0]) assert_equals((2, 1), seriesVals[6][0]) assert_equals((3, 1), seriesVals[7][0]) # check dimensions tuple matches numpy shape assert_equals(ary.shape, series.dims.count) # check that values are in expected order collectedVals = array([kv[1] for kv in seriesVals], dtype=dtypeFunc('int16')).ravel() assert_true(array_equal(ary[:2, :].ravel(order='F'), collectedVals[:4])) # first block assert_true(array_equal(ary[2:4, :].ravel(order='F'), collectedVals[4:])) # second block # check that packing returns original array (after sort) assert_true(array_equal(ary, seriesAry))
def test_fromStackToSeriesWithPack(self): ary = arange(8, dtype=dtypeFunc('int16')).reshape((2, 4)) filename = os.path.join(self.outputdir, "test.stack") ary.tofile(filename) image = ImagesLoader(self.sc).fromStack(filename, dims=(4, 2)) strategy = SimpleBlockingStrategy.generateFromBlockSize(image, "150M") series = image.toBlocks(strategy).toSeries() seriesVals = series.collect() seriesAry = series.pack() # check ordering of keys assert_equals((0, 0), seriesVals[0][0]) # first key assert_equals((1, 0), seriesVals[1][0]) # second key assert_equals((2, 0), seriesVals[2][0]) assert_equals((3, 0), seriesVals[3][0]) assert_equals((0, 1), seriesVals[4][0]) assert_equals((1, 1), seriesVals[5][0]) assert_equals((2, 1), seriesVals[6][0]) assert_equals((3, 1), seriesVals[7][0]) # check dimensions tuple is reversed from numpy shape assert_equals(ary.shape[::-1], series.dims.count) # check that values are in original order collectedVals = array([kv[1] for kv in seriesVals], dtype=dtypeFunc('int16')).ravel() assert_true(array_equal(ary.ravel(), collectedVals)) # check that packing returns transpose of original array assert_true(array_equal(ary.T, seriesAry))
def setUp(self): super(TestBlockKeys, self).setUp() shape = (30, 30) arys = [ones(shape) for _ in range(0, 3)] data = ImagesLoader(self.sc).fromArrays(arys) self.blocks = data.toBlocks(size=(10, 10)).collect() self.keys = [k for k, v in self.blocks]
def test_fromMultiTimepointStacks(self): ary = arange(16, dtype=dtypeFunc('uint8')).reshape((4, 2, 2)) ary2 = arange(16, 32, dtype=dtypeFunc('uint8')).reshape((4, 2, 2)) ary.tofile(os.path.join(self.outputdir, "test01.stack")) ary2.tofile(os.path.join(self.outputdir, "test02.stack")) image = ImagesLoader(self.sc).fromStack(self.outputdir, dtype="uint8", dims=(2, 2, 4), nplanes=2) collectedImage = image.collect() # we don't expect to have nrecords cached, since we get an unknown number of images per file assert_true(image._nrecords is None) assert_equals(4, image.nrecords) assert_equals(4, len(collectedImage)) # check keys: assert_equals(0, collectedImage[0][0]) assert_equals(1, collectedImage[1][0]) assert_equals(2, collectedImage[2][0]) assert_equals(3, collectedImage[3][0]) # check values: assert_true(array_equal(ary[:2].T, collectedImage[0][1])) assert_true(array_equal(ary[2:].T, collectedImage[1][1])) assert_true(array_equal(ary2[:2].T, collectedImage[2][1])) assert_true(array_equal(ary2[2:].T, collectedImage[3][1])) # 3 planes does not divide 4 assert_raises(ValueError, ImagesLoader(self.sc).fromStack, self.outputdir, dtype="uint8", dims=(2, 2, 4), nplanes=3)
def test_castToFloat(self): arys, shape, size = _generateTestArrays(2, 'uint8') imageData = ImagesLoader(self.sc).fromArrays(arys) catData = imageData.astype("smallfloat") assert_equals('float16', str(catData.dtype)) assert_equals('float16', str(catData.first()[1].dtype))
def test_mean(self): from test_utils import elementwiseMean arys, shape, size = _generateTestArrays(2, 'uint8') imageData = ImagesLoader(self.sc).fromArrays(arys) meanVal = imageData.mean() expected = elementwiseMean(arys).astype('float16') assert_true(allclose(expected, meanVal)) assert_equals('float64', str(meanVal.dtype))
def test_stdev(self): from test_utils import elementwiseStdev arys, shape, size = _generateTestArrays(2, 'uint8') imageData = ImagesLoader(self.sc).fromArrays(arys) stdval = imageData.stdev() expected = elementwiseStdev([ary.astype('float16') for ary in arys]) assert_true(allclose(expected, stdval)) assert_equals('float64', str(stdval.dtype))
def test_toSeries(self): # create 3 arrays of 4x3x3 images (C-order), containing sequential integers narys = 3 arys, sh, sz = _generateTestArrays(narys) imageData = ImagesLoader(self.sc).fromArrays(arys) series = imageData.toBlocks((4, 1, 1), units="s").toSeries().collect() self.evaluateSeries(arys, series, sz)
def _run_tstSaveAsBinarySeries(self, testIdx, narys_, valDtype, groupingDim_): """Pseudo-parameterized test fixture, allows reusing existing spark context """ paramStr = "(groupingdim=%d, valuedtype='%s')" % (groupingDim_, valDtype) arys, aryShape, arySize = _generateTestArrays(narys_, dtype_=valDtype) dims = aryShape[:] outdir = os.path.join(self.outputdir, "anotherdir%02d" % testIdx) images = ImagesLoader(self.sc).fromArrays(arys) slicesPerDim = [1]*arys[0].ndim slicesPerDim[groupingDim_] = arys[0].shape[groupingDim_] images.toBlocks(slicesPerDim, units="splits").saveAsBinarySeries(outdir) ndims = len(aryShape) # prevent padding to 4-byte boundaries: "=" specifies no alignment unpacker = struct.Struct('=' + 'h'*ndims + dtypeFunc(valDtype).char*narys_) def calcExpectedNKeys(): tmpShape = list(dims[:]) del tmpShape[groupingDim_] return prod(tmpShape) expectedNKeys = calcExpectedNKeys() def byrec(f_, unpacker_, nkeys_): rec = True while rec: rec = f_.read(unpacker_.size) if rec: allRecVals = unpacker_.unpack(rec) yield allRecVals[:nkeys_], allRecVals[nkeys_:] outFilenames = glob.glob(os.path.join(outdir, "*.bin")) assert_equals(dims[groupingDim_], len(outFilenames)) for outFilename in outFilenames: with open(outFilename, 'rb') as f: nkeys = 0 for keys, vals in byrec(f, unpacker, ndims): nkeys += 1 assert_equals(narys_, len(vals)) for valIdx, val in enumerate(vals): assert_equals(arys[valIdx][keys], val, "Expected %g, got %g, for test %d %s" % (arys[valIdx][keys], val, testIdx, paramStr)) assert_equals(expectedNKeys, nkeys) confName = os.path.join(outdir, "conf.json") assert_true(os.path.isfile(confName)) with open(os.path.join(outdir, "conf.json"), 'r') as fconf: import json conf = json.load(fconf) assert_equals(outdir, conf['input']) assert_equals(len(aryShape), conf['nkeys']) assert_equals(narys_, conf['nvalues']) assert_equals(valDtype, conf['valuetype']) assert_equals('int16', conf['keytype']) assert_true(os.path.isfile(os.path.join(outdir, 'SUCCESS')))
def test_toTimeSeries(self): # create 3 arrays of 4x3x3 images (C-order), containing sequential integers narys = 3 arys, sh, sz = _generateTestArrays(narys) imageData = ImagesLoader(self.sc).fromArrays(arys) series = imageData.toTimeSeries() assert(isinstance(series, TimeSeries))
def test_sum(self): from numpy import add arys, shape, size = _generateTestArrays(2, 'uint8') imageData = ImagesLoader(self.sc).fromArrays(arys) sumVal = imageData.sum(dtype='uint32') arys = [ary.astype('uint32') for ary in arys] expected = reduce(add, arys) assert_true(array_equal(expected, sumVal)) assert_equals('uint32', str(sumVal.dtype))
def _run_tst_roundtripThroughBlocks(self, strategy): imagepath = findSourceTreeDir("utils/data/fish/images") images = ImagesLoader(self.sc).fromTif(imagepath) blockedimages = images.toBlocks(strategy) recombinedimages = blockedimages.toImages() collectedimages = images.collect() roundtrippedimages = recombinedimages.collect() for orig, roundtripped in zip(collectedimages, roundtrippedimages): assert_true(array_equal(orig[1], roundtripped[1]))
def test_fromArrays(self): ary = arange(8, dtype=dtypeFunc('int16')).reshape((2, 4)) image = ImagesLoader(self.sc).fromArrays(ary) collectedImage = image.collect() assert_equals(1, len(collectedImage)) assert_equals(ary.shape, image.dims.count) assert_equals(0, collectedImage[0][0]) # check key assert_true(array_equal(ary, collectedImage[0][1])) # check value
def test_meanWithSingleRegionIndices3D(self): ary1 = array([[[3, 5, 3], [6, 8, 6]], [[3, 5, 3], [6, 8, 6]]], dtype='int32') ary2 = array([[[13, 15, 13], [16, 18, 16]], [[13, 15, 13], [16, 18, 16]]], dtype='int32') images = ImagesLoader(self.sc).fromArrays([ary1, ary2]) indices = [[(1, 1, 1), (0, 0, 0)]] # one region with two indices regionMeanImages = images.meanByRegions(indices) self.__checkAttrPropagation(regionMeanImages, (1, 1)) collected = regionMeanImages.collect() # check values assert_equals(5, collected[0][1][0]) assert_equals(15, collected[1][1][0])
def test_toBlocksWithSplit(self): ary = arange(8, dtype=dtypeFunc('int16')).reshape((2, 4)) image = ImagesLoader(self.sc).fromArrays(ary) groupedblocks = image.toBlocks((1, 2), units="s") # collectedblocks = blocks.collect() collectedgroupedblocks = groupedblocks.collect() assert_equals((0, 0), collectedgroupedblocks[0][0].spatialKey) assert_true(array_equal(ary[:, :2].ravel(), collectedgroupedblocks[0][1].ravel())) assert_equals((0, 2), collectedgroupedblocks[1][0].spatialKey) assert_true(array_equal(ary[:, 2:].ravel(), collectedgroupedblocks[1][1].ravel()))
def test_planes(self): dims = (2, 2, 4) sz = reduce(lambda x, y: x*y, dims) origAry = arange(sz, dtype=dtypeFunc('int16')).reshape(dims) imageData = ImagesLoader(self.sc).fromArrays([origAry]) planedData = imageData.planes(0, 2) planed = planedData.collect()[0][1] expected = squeeze(origAry[slice(None), slice(None), slice(0, 2)]) assert_true(array_equal(expected, planed)) assert_equals(tuple(expected.shape), planedData._dims.count) assert_equals(str(expected.dtype), planedData._dtype)
def test_subtract(self): narys = 3 arys, sh, sz = _generateTestArrays(narys) subVals = [1, arange(sz, dtype=dtypeFunc('int16')).reshape(sh)] imageData = ImagesLoader(self.sc).fromArrays(arys) for subVal in subVals: subData = imageData.subtract(subVal) subtracted = subData.collect() expectedArys = map(lambda ary: ary - subVal, arys) for actual, expected in zip(subtracted, expectedArys): assert_true(allclose(expected, actual[1]))
def test_stats(self): from test_utils import elementwiseMean, elementwiseVar arys, shape, size = _generateTestArrays(2, 'uint8') imageData = ImagesLoader(self.sc).fromArrays(arys) statsval = imageData.stats() floatarys = [ary.astype('float16') for ary in arys] # StatsCounter contains a few different measures, only test a couple: expectedMean = elementwiseMean(floatarys) expectedVar = elementwiseVar(floatarys) assert_true(allclose(expectedMean, statsval.mean())) assert_true(allclose(expectedVar, statsval.variance()))
def test_fromStack(self): ary = arange(8, dtype=dtypeFunc('int16')).reshape((2, 4)) filename = os.path.join(self.outputdir, "test.stack") ary.tofile(filename) image = ImagesLoader(self.sc).fromStack(filename, dims=(4, 2)) collectedImage = image.collect() assert_equals(1, len(collectedImage)) assert_equals(0, collectedImage[0][0]) # check key # assert that image shape *matches* that in image dimensions: assert_equals(image.dims.count, collectedImage[0][1].shape) assert_true(array_equal(ary.T, collectedImage[0][1])) # check value
def test_fromPng(self): imagePath = os.path.join(self.testResourcesDir, "singlelayer_png", "dot1_grey.png") pngImage = ImagesLoader(self.sc).fromPng(imagePath) firstPngImage = pngImage.first() assert_equals(0, firstPngImage[0], "Key error; expected first image key to be 0, was "+str(firstPngImage[0])) expectedShape = (70, 75) assert_true(isinstance(firstPngImage[1], ndarray), "Value type error; expected first image value to be numpy ndarray, was " + str(type(firstPngImage[1]))) assert_equals(expectedShape, firstPngImage[1].shape) assert_equals(expectedShape, pngImage.dims.count) assert_almost_equal(0.937, firstPngImage[1].ravel().max(), places=2) # integer val 239 assert_almost_equal(0.00392, firstPngImage[1].ravel().min(), places=2) # integer val 1
def test_fromTif(self): imagePath = os.path.join(self.testResourcesDir, "singlelayer_tif", "dot1_grey_lzw.tif") tiffImage = ImagesLoader(self.sc).fromTif(imagePath) firstTiffImage = tiffImage.first() assert_equals(0, firstTiffImage[0], "Key error; expected first image key to be 0, was "+str(firstTiffImage[0])) expectedShape = (70, 75) assert_true(isinstance(firstTiffImage[1], ndarray), "Value type error; expected first image value to be numpy ndarray, was " + str(type(firstTiffImage[1]))) assert_equals(expectedShape, firstTiffImage[1].shape) assert_equals(expectedShape, tiffImage.dims.count) assert_equals(239, firstTiffImage[1].ravel().max()) assert_equals(1, firstTiffImage[1].ravel().min())
def test_saveAsBinaryImages(self): narys = 3 arys, aryShape, _ = _generateTestArrays(narys) outdir = os.path.join(self.outputdir, "binary-images") images = ImagesLoader(self.sc).fromArrays(arys) images.saveAsBinaryImages(outdir) outFilenames = sorted(glob.glob(os.path.join(outdir, "*.bin"))) trueFilenames = map(lambda f: os.path.join(outdir, f), ['image-00000.bin', 'image-00001.bin', 'image-00002.bin']) assert_true(os.path.isfile(os.path.join(outdir, 'SUCCESS'))) assert_true(os.path.isfile(os.path.join(outdir, "conf.json"))) assert_equals(outFilenames, trueFilenames)
def test_toSeriesBySlices(self): narys = 3 arys, sh, sz = _generateTestArrays(narys) imageData = ImagesLoader(self.sc).fromArrays(arys) imageData.cache() testParams = [ (1, 1, 1), (1, 1, 2), (1, 1, 3), (1, 2, 1), (1, 2, 2), (1, 2, 3), (1, 3, 1), (1, 3, 2), (1, 3, 3), (2, 1, 1), (2, 1, 2), (2, 1, 3), (2, 2, 1), (2, 2, 2), (2, 2, 3), (2, 3, 1), (2, 3, 2), (2, 3, 3)] for bpd in testParams: series = imageData.toBlocks(bpd, units="s").toSeries().collect() self.evaluateSeries(arys, series, sz)
def test_fromStacks(self): ary = arange(8, dtype=dtypeFunc('int16')).reshape((2, 4)) ary2 = arange(8, 16, dtype=dtypeFunc('int16')).reshape((2, 4)) filename = os.path.join(self.outputdir, "test01.stack") ary.tofile(filename) filename = os.path.join(self.outputdir, "test02.stack") ary2.tofile(filename) image = ImagesLoader(self.sc).fromStack(self.outputdir, dims=(4, 2)) collectedImage = image.collect() assert_equals(2, len(collectedImage)) assert_equals(0, collectedImage[0][0]) # check key assert_equals(image.dims.count, collectedImage[0][1].shape) assert_true(array_equal(ary.T, collectedImage[0][1])) # check value assert_equals(1, collectedImage[1][0]) # check image 2 assert_true(array_equal(ary2.T, collectedImage[1][1]))
def test_fromSignedIntTif(self): imagePath = os.path.join(self.testResourcesDir, "multilayer_tif", "test_signed.tif") tiffImages = ImagesLoader(self.sc).fromTif(imagePath, nplanes=1) assert_equals(2, tiffImages.nrecords) assert_equals('int16', tiffImages.dtype) collectedTiffImages = tiffImages.collect() assert_equals(2, len(collectedTiffImages), "Expected 2 images, got %d" % len(collectedTiffImages)) expectedSums = [1973201, 2254767] expectedIdx = 0 for idx, tiffAry in collectedTiffImages: assert_equals((120, 120), tiffAry.shape) assert_equals('int16', str(tiffAry.dtype)) assert_equals(expectedIdx, idx) assert_equals(expectedSums[idx], tiffAry.ravel().sum()) expectedIdx += 1
def loadImagesOCP(self, bucketName, resolution, server='ocp.me', startIdx=None, stopIdx=None, minBound=None, maxBound=None): """ Load Images from OCP (Open Connectome Project). The OCP is a web service for access to EM brain images and other neural image data. The web-service can be accessed at http://www.openconnectomeproject.org/. Parameters ---------- bucketName: string Token name for the project in OCP. This name should exist on the server from which data is loaded. resolution: nonnegative int Resolution of the data in OCP server: string, optional, default = 'ocp.me' Name of the OCP server with the specified token. startIdx: nonnegative int, optional, default = None Convenience parameters to read only a subset of input files. Uses python slice conventions (zero-based indexing with exclusive final position). stopIdx: nonnegative int, optional See startIdx. minBound, maxBound: tuple of nonnegative int, optional, default = None X,Y,Z bounds of the data to fetch from OCP. minBound contains the (xMin,yMin,zMin) while maxBound contains (xMax,yMax,zMax). Returns ------- data: lambdaimage.rdds.Images An Images object, wrapping an RDD of with (int) : (numpy array) pairs """ from lambdaimage.rdds.fileio.imagesloader import ImagesLoader loader = ImagesLoader(self._sc) # Checking StartIdx is smaller or equal to StopIdx if startIdx is not None and stopIdx is not None and startIdx > stopIdx: raise Exception("Error. startIdx {} is larger than stopIdx {}".format(startIdx, stopIdx)) data = loader.fromOCP(bucketName, resolution=resolution, server=server, startIdx=startIdx, stopIdx=stopIdx, minBound=minBound, maxBound=maxBound) return data
def test_fromMultiTimepointTif(self): imagePath = os.path.join(self.testResourcesDir, "multilayer_tif", "dotdotdot_lzw.tif") tiffImages = ImagesLoader(self.sc).fromTif(imagePath, nplanes=1) # we don't expect to have nrecords cached, since the driver doesn't know how many images there are per file assert_true(tiffImages._nrecords is None) assert_equals(3, tiffImages.nrecords) collectedTiffImages = tiffImages.collect() assert_equals(3, len(collectedTiffImages), "Expected 3 images, got %d" % len(collectedTiffImages)) expectedSums = [1140006, 1119161, 1098917] expectedIdx = 0 for idx, tiffAry in collectedTiffImages: assert_equals((70, 75), tiffAry.shape) assert_equals(expectedIdx, idx) assert_equals(expectedSums[idx], tiffAry.ravel().sum()) expectedIdx += 1
def _run_tst_crop(self, minBounds, maxBounds): dims = (2, 2, 4) sz = reduce(lambda x, y: x*y, dims) origAry = arange(sz, dtype=dtypeFunc('int16')).reshape(dims) imageData = ImagesLoader(self.sc).fromArrays([origAry]) croppedData = imageData.crop(minBounds, maxBounds) crop = croppedData.collect()[0][1] slices = [] for minb, maxb in zip(minBounds, maxBounds): # skip the bounds-checking that we do in actual function; assume minb is <= maxb if minb < maxb: slices.append(slice(minb, maxb)) else: slices.append(minb) expected = squeeze(origAry[slices]) assert_true(array_equal(expected, crop)) assert_equals(tuple(expected.shape), croppedData._dims.count) assert_equals(str(expected.dtype), croppedData._dtype)