def test_split_hdfsubreadset(self): hdfds = HdfSubreadSet(*upstreamData.getBaxH5_v23()) self.assertEqual(len(hdfds.toExternalFiles()), 3) hdfdss = hdfds.split(chunks=2, ignoreSubDatasets=True) self.assertEqual(len(hdfdss), 2) self.assertEqual(len(hdfdss[0].toExternalFiles()), 2) self.assertEqual(len(hdfdss[1].toExternalFiles()), 1)
def test_split_hdfsubreadset(self): hdfds = HdfSubreadSet(*upstreamdata.getBaxH5_v23()) self.assertEqual(len(hdfds.toExternalFiles()), 3) hdfdss = hdfds.split(chunks=2, ignoreSubDatasets=True) self.assertEqual(len(hdfdss), 2) self.assertEqual(len(hdfdss[0].toExternalFiles()), 2) self.assertEqual(len(hdfdss[1].toExternalFiles()), 1)
def to_chunked_hdfsubreadset_files(hdfsubreadset_path, max_total_nchunks, chunk_key, dir_name, base_name, ext): dset = HdfSubreadSet(hdfsubreadset_path, strict=True) dset_chunks = dset.split(chunks=max_total_nchunks, ignoreSubDatasets=True) d = {} for i, dset in enumerate(dset_chunks): chunk_id = '_'.join([base_name, str(i)]) chunk_name = '.'.join([chunk_id, ext]) chunk_path = os.path.join(dir_name, chunk_name) dset.write(chunk_path) d[chunk_key] = os.path.abspath(chunk_path) c = PipelineChunk(chunk_id, **d) yield c