def _run_tstSaveAsBinarySeries(self, testIdx, narys_, valDtype, groupingDim_): """Pseudo-parameterized test fixture, allows reusing existing spark context """ paramStr = "(groupingdim=%d, valuedtype='%s')" % (groupingDim_, valDtype) arys, aryShape, arySize = _generateTestArrays(narys_, dtype_=valDtype) dims = aryShape[:] outdir = os.path.join(self.outputdir, "anotherdir%02d" % testIdx) images = ImagesLoader(self.sc).fromArrays(arys) slicesPerDim = [1]*arys[0].ndim slicesPerDim[groupingDim_] = arys[0].shape[groupingDim_] images.toBlocks(slicesPerDim, units="splits").saveAsBinarySeries(outdir) ndims = len(aryShape) # prevent padding to 4-byte boundaries: "=" specifies no alignment unpacker = struct.Struct('=' + 'h'*ndims + dtypeFunc(valDtype).char*narys_) def calcExpectedNKeys(): tmpShape = list(dims[:]) del tmpShape[groupingDim_] return prod(tmpShape) expectedNKeys = calcExpectedNKeys() def byrec(f_, unpacker_, nkeys_): rec = True while rec: rec = f_.read(unpacker_.size) if rec: allRecVals = unpacker_.unpack(rec) yield allRecVals[:nkeys_], allRecVals[nkeys_:] outFilenames = glob.glob(os.path.join(outdir, "*.bin")) assert_equals(dims[groupingDim_], len(outFilenames)) for outFilename in outFilenames: with open(outFilename, 'rb') as f: nkeys = 0 for keys, vals in byrec(f, unpacker, ndims): nkeys += 1 assert_equals(narys_, len(vals)) for valIdx, val in enumerate(vals): assert_equals(arys[valIdx][keys], val, "Expected %g, got %g, for test %d %s" % (arys[valIdx][keys], val, testIdx, paramStr)) assert_equals(expectedNKeys, nkeys) confName = os.path.join(outdir, "conf.json") assert_true(os.path.isfile(confName)) with open(os.path.join(outdir, "conf.json"), 'r') as fconf: import json conf = json.load(fconf) assert_equals(outdir, conf['input']) assert_equals(len(aryShape), conf['nkeys']) assert_equals(narys_, conf['nvalues']) assert_equals(valDtype, conf['valuetype']) assert_equals('int16', conf['keytype']) assert_true(os.path.isfile(os.path.join(outdir, 'SUCCESS')))
def test_toSeriesWithPack(self): ary = arange(8, dtype=dtypeFunc('int16')).reshape((2, 4)) image = ImagesLoader(self.sc).fromArrays(ary) series = image.toBlocks("150M").toSeries() seriesVals = series.collect() seriesAry = series.pack() seriesAry_xpose = series.pack(transpose=True) # check ordering of keys assert_equals((0, 0), seriesVals[0][0]) # first key assert_equals((1, 0), seriesVals[1][0]) # second key assert_equals((0, 1), seriesVals[2][0]) assert_equals((1, 1), seriesVals[3][0]) assert_equals((0, 2), seriesVals[4][0]) assert_equals((1, 2), seriesVals[5][0]) assert_equals((0, 3), seriesVals[6][0]) assert_equals((1, 3), seriesVals[7][0]) # check dimensions tuple matches numpy shape assert_equals(image.dims.count, series.dims.count) assert_equals(ary.shape, series.dims.count) # check that values are in Fortran-convention order collectedVals = array([kv[1] for kv in seriesVals], dtype=dtypeFunc('int16')).ravel() assert_true(array_equal(ary.ravel(order='F'), collectedVals)) # check that packing returns original array assert_true(array_equal(ary, seriesAry)) assert_true(array_equal(ary.T, seriesAry_xpose))
def test_fromStackToSeriesWithPack(self): ary = arange(8, dtype=dtypeFunc('int16')).reshape((2, 4)) filename = os.path.join(self.outputdir, "test.stack") ary.tofile(filename) image = ImagesLoader(self.sc).fromStack(filename, dims=(4, 2)) strategy = SimpleBlockingStrategy.generateFromBlockSize(image, "150M") series = image.toBlocks(strategy).toSeries() seriesVals = series.collect() seriesAry = series.pack() # check ordering of keys assert_equals((0, 0), seriesVals[0][0]) # first key assert_equals((1, 0), seriesVals[1][0]) # second key assert_equals((2, 0), seriesVals[2][0]) assert_equals((3, 0), seriesVals[3][0]) assert_equals((0, 1), seriesVals[4][0]) assert_equals((1, 1), seriesVals[5][0]) assert_equals((2, 1), seriesVals[6][0]) assert_equals((3, 1), seriesVals[7][0]) # check dimensions tuple is reversed from numpy shape assert_equals(ary.shape[::-1], series.dims.count) # check that values are in original order collectedVals = array([kv[1] for kv in seriesVals], dtype=dtypeFunc('int16')).ravel() assert_true(array_equal(ary.ravel(), collectedVals)) # check that packing returns transpose of original array assert_true(array_equal(ary.T, seriesAry))
def test_toSeriesWithInefficientSplitAndSortedPack(self): ary = arange(8, dtype=dtypeFunc('int16')).reshape((4, 2)) image = ImagesLoader(self.sc).fromArrays(ary) series = image.toBlocks((2, 1), units="s").toSeries() seriesVals = series.collect() seriesAry = series.pack(sorting=True) # check ordering of keys assert_equals((0, 0), seriesVals[0][0]) # first key assert_equals((1, 0), seriesVals[1][0]) # second key assert_equals((0, 1), seriesVals[2][0]) assert_equals((1, 1), seriesVals[3][0]) # end of first block # beginning of second block assert_equals((2, 0), seriesVals[4][0]) assert_equals((3, 0), seriesVals[5][0]) assert_equals((2, 1), seriesVals[6][0]) assert_equals((3, 1), seriesVals[7][0]) # check dimensions tuple matches numpy shape assert_equals(ary.shape, series.dims.count) # check that values are in expected order collectedVals = array([kv[1] for kv in seriesVals], dtype=dtypeFunc('int16')).ravel() assert_true(array_equal(ary[:2, :].ravel(order='F'), collectedVals[:4])) # first block assert_true(array_equal(ary[2:4, :].ravel(order='F'), collectedVals[4:])) # second block # check that packing returns original array (after sort) assert_true(array_equal(ary, seriesAry))
def setUp(self): super(TestBlockKeys, self).setUp() shape = (30, 30) arys = [ones(shape) for _ in range(0, 3)] data = ImagesLoader(self.sc).fromArrays(arys) self.blocks = data.toBlocks(size=(10, 10)).collect() self.keys = [k for k, v in self.blocks]
def test_toSeries(self): # create 3 arrays of 4x3x3 images (C-order), containing sequential integers narys = 3 arys, sh, sz = _generateTestArrays(narys) imageData = ImagesLoader(self.sc).fromArrays(arys) series = imageData.toBlocks((4, 1, 1), units="s").toSeries().collect() self.evaluateSeries(arys, series, sz)
def _run_tst_roundtripThroughBlocks(self, strategy): imagepath = findSourceTreeDir("utils/data/fish/images") images = ImagesLoader(self.sc).fromTif(imagepath) blockedimages = images.toBlocks(strategy) recombinedimages = blockedimages.toImages() collectedimages = images.collect() roundtrippedimages = recombinedimages.collect() for orig, roundtripped in zip(collectedimages, roundtrippedimages): assert_true(array_equal(orig[1], roundtripped[1]))
def test_toBlocksWithSplit(self): ary = arange(8, dtype=dtypeFunc('int16')).reshape((2, 4)) image = ImagesLoader(self.sc).fromArrays(ary) groupedblocks = image.toBlocks((1, 2), units="s") # collectedblocks = blocks.collect() collectedgroupedblocks = groupedblocks.collect() assert_equals((0, 0), collectedgroupedblocks[0][0].spatialKey) assert_true(array_equal(ary[:, :2].ravel(), collectedgroupedblocks[0][1].ravel())) assert_equals((0, 2), collectedgroupedblocks[1][0].spatialKey) assert_true(array_equal(ary[:, 2:].ravel(), collectedgroupedblocks[1][1].ravel()))
def test_toSeriesBySlices(self): narys = 3 arys, sh, sz = _generateTestArrays(narys) imageData = ImagesLoader(self.sc).fromArrays(arys) imageData.cache() testParams = [ (1, 1, 1), (1, 1, 2), (1, 1, 3), (1, 2, 1), (1, 2, 2), (1, 2, 3), (1, 3, 1), (1, 3, 2), (1, 3, 3), (2, 1, 1), (2, 1, 2), (2, 1, 3), (2, 2, 1), (2, 2, 2), (2, 2, 3), (2, 3, 1), (2, 3, 2), (2, 3, 3)] for bpd in testParams: series = imageData.toBlocks(bpd, units="s").toSeries().collect() self.evaluateSeries(arys, series, sz)