def loadSeriesFromArray(self, values, index=None, npartitions=None): """ Load Series data from a local array Parameters ---------- values : list or ndarray A list of 1d numpy arrays, or a single 2d numpy array index : array-like, optional, deafult = None Index to set for Series object, if None will use linear indices. npartitions : position int, optional, default = None Number of partitions for RDD, if unspecified will use default parallelism. """ from numpy import ndarray, asarray from thunder.rdds.fileio.seriesloader import SeriesLoader loader = SeriesLoader(self._sc) if not npartitions: npartitions = self._sc.defaultParallelism if isinstance(values, list): values = asarray(values) if isinstance(values, ndarray) and values.ndim > 1: values = list(values) data = loader.fromArrays(values, npartitions=npartitions) if index: data.index = index return data
def loadSeriesFromArray(self, values, index=None, npartitions=None): """ Load Series data from a local array Parameters ---------- values : list or ndarray A list of 1d numpy arrays, or a single 2d numpy array index : array-like, optional, deafult = None Index to set for Series object, if None will use linear indices. npartitions : position int, optional, default = None Number of partitions for RDD, if unspecified will use default parallelism. """ from numpy import ndarray, asarray from thunder.rdds.fileio.seriesloader import SeriesLoader loader = SeriesLoader(self._sc) if not npartitions: npartitions = self._sc.defaultParallelism if isinstance(values, list): values = asarray(values) if isinstance(values, ndarray) and values.ndim > 1: values = list(values) data = loader.fromArrays(values, npartitions=npartitions) if index: data.index = index return data
def _run_roundtrip_exception_tst(self, nimages, aryShape, dtypeSpec, sizeSpec): testArrays = TestSeriesBinaryWriteFromStack.generateTestImages( nimages, aryShape, dtypeSpec) loader = SeriesLoader(self.sc) series = loader.fromArrays(testArrays) assert_raises(ValueError, series.toBlocks, sizeSpec)
def _run_roundtrip_tst(self, nimages, aryShape, dtypeSpec, sizeSpec): testArrays = TestSeriesBinaryWriteFromStack.generateTestImages(nimages, aryShape, dtypeSpec) loader = SeriesLoader(self.sc) series = loader.fromArrays(testArrays) blocks = series.toBlocks(sizeSpec) roundtrippedSeries = blocks.toSeries(newDType=series.dtype) packedSeries = series.pack() packedRoundtrippedSeries = roundtrippedSeries.pack() assert_true(array_equal(packedSeries, packedRoundtrippedSeries))
def _run_roundtrip_tst(self, nimages, aryShape, dtypeSpec, sizeSpec): testArrays = TestSeriesBinaryWriteFromStack.generateTestImages( nimages, aryShape, dtypeSpec) loader = SeriesLoader(self.sc) series = loader.fromArrays(testArrays) blocks = series.toBlocks(sizeSpec) roundtrippedSeries = blocks.toSeries(newDType=series.dtype) packedSeries = series.pack() packedRoundtrippedSeries = roundtrippedSeries.pack() assert_true(array_equal(packedSeries, packedRoundtrippedSeries))
def _run_roundtrip_tst(self, testIdx, nimages, aryShape, dtypeSpec, npartitions): testArrays = TestSeriesBinaryWriteFromStack.generateTestImages( nimages, aryShape, dtypeSpec) loader = SeriesLoader(self.sc) series = loader.fromArrays(testArrays) saveDirPath = os.path.join(self.outputdir, 'save%d' % testIdx) series.repartition( npartitions ) # note: this does an elementwise shuffle! won't be in sorted order series.saveAsBinarySeries(saveDirPath) nnonemptyPartitions = 0 for partitionList in series.rdd.glom().collect(): if partitionList: nnonemptyPartitions += 1 del partitionList nsaveFiles = len(glob.glob(saveDirPath + os.sep + "*.bin")) roundtrippedSeries = loader.fromBinary(saveDirPath) with open(os.path.join(saveDirPath, "conf.json"), 'r') as fp: conf = json.load(fp) # sorting is required here b/c of the randomization induced by the repartition. # orig and roundtripped will in general be different from each other, since roundtripped # will have (0, 0, 0) index as first element (since it will be the lexicographically first # file) while orig has only a 1 in npartitions chance of starting with (0, 0, 0) after repartition. expectedPackedAry = series.pack(sorting=True) actualPackedAry = roundtrippedSeries.pack(sorting=True) assert_true(array_equal(expectedPackedAry, actualPackedAry)) assert_equals(nnonemptyPartitions, nsaveFiles) assert_equals(len(aryShape), conf["nkeys"]) assert_equals(nimages, conf["nvalues"]) assert_equals("int16", conf["keytype"]) assert_equals(str(series.dtype), conf["valuetype"]) # check that we have converted ourselves to an appropriate float after reloading assert_equals(str(smallestFloatType(series.dtype)), str(roundtrippedSeries.dtype))
def _run_roundtrip_tst(self, testIdx, nimages, aryShape, dtypeSpec, npartitions): testArrays = TestSeriesBinaryWriteFromStack.generateTestImages(nimages, aryShape, dtypeSpec) loader = SeriesLoader(self.sc) series = loader.fromArrays(testArrays) saveDirPath = os.path.join(self.outputdir, 'save%d' % testIdx) series.repartition(npartitions) # note: this does an elementwise shuffle! won't be in sorted order series.saveAsBinarySeries(saveDirPath) nnonemptyPartitions = 0 for partitionList in series.rdd.glom().collect(): if partitionList: nnonemptyPartitions += 1 del partitionList nsaveFiles = len(glob.glob(saveDirPath + os.sep + "*.bin")) roundtrippedSeries = loader.fromBinary(saveDirPath) with open(os.path.join(saveDirPath, "conf.json"), 'r') as fp: conf = json.load(fp) # sorting is required here b/c of the randomization induced by the repartition. # orig and roundtripped will in general be different from each other, since roundtripped # will have (0, 0, 0) index as first element (since it will be the lexicographically first # file) while orig has only a 1 in npartitions chance of starting with (0, 0, 0) after repartition. expectedPackedAry = series.pack(sorting=True) actualPackedAry = roundtrippedSeries.pack(sorting=True) assert_true(array_equal(expectedPackedAry, actualPackedAry)) assert_equals(nnonemptyPartitions, nsaveFiles) assert_equals(len(aryShape), conf["nkeys"]) assert_equals(nimages, conf["nvalues"]) assert_equals("int16", conf["keytype"]) assert_equals(str(series.dtype), conf["valuetype"]) # check that we have converted ourselves to an appropriate float after reloading assert_equals(str(smallestFloatType(series.dtype)), str(roundtrippedSeries.dtype))
def _run_roundtrip_exception_tst(self, nimages, aryShape, dtypeSpec, sizeSpec): testArrays = TestSeriesBinaryWriteFromStack.generateTestImages(nimages, aryShape, dtypeSpec) loader = SeriesLoader(self.sc) series = loader.fromArrays(testArrays) assert_raises(ValueError, series.toBlocks, sizeSpec)