def test(self): rng = numpy.random.default_rng(42) elements = rng.random(size=100) tmpdir = tempfile.mkdtemp() tmp = os.path.join(tmpdir, 'calc_1.hdf5') print('Creating', tmp) duration = .5 outs_per_task = 5 timefactor = .2 with hdf5.File(tmp, 'w') as h5: performance.init_performance(h5) smap = parallel.Starmap(process_elements, h5=h5) smap.submit_split((elements, timefactor), duration, outs_per_task) res = smap.reduce(acc=0) self.assertAlmostEqual(res, 48.6718458266) """ with hdf5.File(tmp, 'w') as h5: performance.init_performance(h5) res = parallel.Starmap.apply_split( process_elements, (elements, timefactor), h5=h5, duration=duration ).reduce(acc=0) self.assertAlmostEqual(res, 48.6718458266) """ shutil.rmtree(tmpdir)
def __init__(self, oqparam, calc_id): self.datastore = datastore.DataStore(calc_id) init_performance(self.datastore.hdf5) self._monitor = Monitor('%s.run' % self.__class__.__name__, measuremem=True, h5=self.datastore) # NB: using h5=self.datastore.hdf5 would mean losing the performance # info about Calculator.run since the file will be closed later on self.oqparam = oqparam
def hdf5new(datadir=None): """ Return a new `hdf5.File by` instance with name determined by the last calculation in the datadir (plus one). Set the .path attribute to the generated filename. """ datadir = datadir or get_datadir() if not os.path.exists(datadir): os.makedirs(datadir) calc_id = get_last_calc_id(datadir) + 1 fname = os.path.join(datadir, 'calc_%d.hdf5' % calc_id) new = hdf5.File(fname, 'w') new.path = fname performance.init_performance(new) return new
def __init__(self, path, ppath=None, mode=None): self.filename = path self.ppath = ppath self.calc_id, datadir = extract_calc_id_datadir(path) self.tempname = self.filename[:-5] + '_tmp.hdf5' if not os.path.exists(datadir) and mode != 'r': os.makedirs(datadir) self.parent = () # can be set later self.datadir = datadir self.mode = mode or ('r+' if os.path.exists(self.filename) else 'w') if self.mode == 'r' and not os.path.exists(self.filename): raise IOError('File not found: %s' % self.filename) self.hdf5 = () # so that `key in self.hdf5` is valid self.open(self.mode) if mode != 'r': # w, a or r+ performance.init_performance(self.hdf5)
def test_supertask(self): # this test has 4 supertasks generating 4 + 5 + 3 + 5 = 17 subtasks allargs = [('aaaaeeeeiii', ), ('uuuuaaaaeeeeiii', ), ('aaaaaaaaeeeeiii', ), ('aaaaeeeeiiiiiooooooo', )] numchars = sum(len(arg) for arg, in allargs) # 61 tmpdir = tempfile.mkdtemp() tmp = os.path.join(tmpdir, 'calc_1.hdf5') performance.init_performance(tmp, swmr=True) smap = parallel.Starmap(supertask, allargs, h5=hdf5.File(tmp, 'a')) res = smap.reduce() smap.h5.close() self.assertEqual(res, {'n': numchars}) # check that the correct information is stored in the hdf5 file with hdf5.File(tmp, 'r') as h5: num = general.countby(h5['performance_data'][()], 'operation') self.assertEqual(num[b'waiting'], 4) self.assertEqual(num[b'total supertask'], 4) # tasks self.assertEqual(num[b'total get_length'], 17) # subtasks self.assertGreater(len(h5['task_info']), 0) shutil.rmtree(tmpdir)
def setUpClass(cls): tmpdir = tempfile.mkdtemp() cls.tmp = os.path.join(tmpdir, 'calc_1.hdf5') with hdf5.File(cls.tmp, 'w') as h: h['array'] = numpy.arange(100) performance.init_performance(cls.tmp, swmr=True)