def combine(): with h5.HDFStore('samples.h5', mode='a') as store: # (model, SAT, sample) that are already in `store`. store_idx = store.get_index().droplevel(_index).unique() for model in os.listdir(_path): path_model = os.path.join(_path, model) for SAT in map(int, sorted(os.listdir(path_model))): path_SAT = os.path.join(path_model, str(SAT)) # Sort in integer order. for filename in sorted(os.listdir(path_SAT), key=_get_sample_number): sample = _get_sample_number(filename) if (model, SAT, sample) not in store_idx: path_sample = os.path.join(path_SAT, filename) recarray = numpy.load(path_sample) df = pandas.DataFrame.from_records(recarray, index=_index) run._prepend_index_levels(df, model=model, SAT=SAT, sample=sample) print('Inserting ' + ', '.join((f'model={model}', f'SAT={SAT}', f'sample={sample}')) + '.') store.put(df, min_itemsize=run._min_itemsize)
def build_downsampled(filename_in, t_min=0, t_max=10, t_step=1/365, by=None): t = arange(t_min, t_max, t_step, endpoint=True) base, ext = os.path.splitext(filename_in) filename_out = base + '_downsampled' + ext with h5.HDFStore(filename_in, mode='r') as store_in, \ h5.HDFStore(filename_out, mode='w') as store_out: if by is None: by = [n for n in store_in.get_index_names() if n != t_name] for (ix, group) in store_in.groupby(by): downsampled = _build_downsampled_group(group, t, t_step, by) # Append `ix` to the index levels. downsampled = pandas.concat({ix: downsampled}, names=by, copy=False) store_out.put(downsampled.dropna(), index=False, min_itemsize=run._min_itemsize) store_out.create_table_index() store_out.repack()
def _copy_run(model, SAT, population_size, nruns, hdfstore_out): '''Copy the data from 'run.h5'.''' filename = 'run.h5' where = f'model={model} & SAT={SAT} & run<{nruns}' with h5.HDFStore(filename, mode='r') as hdfstore_in: for chunk in hdfstore_in.select(where=where, iterator=True): run._insert_index_levels(chunk, 2, population_size=population_size) hdfstore_out.put(chunk, min_itemsize=run._min_itemsize)
def _copy_run(model, SAT, bscov, nruns, hdfstore_out): '''Copy the data from 'run.h5'.''' filename = 'run.h5' where = f'model={model} & SAT={SAT} & run<{nruns}' with h5.HDFStore(filename, mode='r') as hdfstore_in: for chunk in hdfstore_in.select(where=where, iterator=True): run._insert_index_levels( chunk, 2, birth_seasonal_coefficient_of_variation=bscov) hdfstore_out.put(chunk, min_itemsize=run._min_itemsize)
def _build_extinction_time(filename, filename_out, by=None): # Only the infected columns. columns = ['exposed', 'infectious', 'chronic'] extinction = {} with h5.HDFStore(filename, mode='r') as store: if by is None: by = [n for n in store.get_index_names() if n != t_name] for (ix, group) in store.groupby(by, columns=columns): infected = group.sum(axis='columns') extinction[ix] = _build_extinction_time_group(infected) extinction = pandas.DataFrame.from_dict(extinction, orient='index') extinction.index.names = by extinction.sort_index(level=by, inplace=True) h5.dump(extinction, filename_out, mode='w', min_itemsize=run._min_itemsize)
return h.run(tmax) def run_many(parameters, tmax, nruns, *args, **kwargs): '''Run many simulations in parallel.''' results = Parallel(n_jobs=-1)( delayed(run_one)(parameters, tmax, i, *args, **kwargs) for i in range(nruns)) # Make 'run' the outer row index. return pandas.concat(results, keys=range(nruns), names=['run'], copy=False) def run(model, SAT, tmax, nruns, store): p = herd.Parameters(model=model, SAT=SAT) logging_prefix = (', '.join((f'model {model}', f'SAT {SAT}')) + ', ') df = run_many(p, tmax, nruns, logging_prefix=logging_prefix) _prepend_index_levels(df, model=model, SAT=SAT) hdfstore.put(df, min_itemsize=_min_itemsize) if __name__ == '__main__': nruns = 1000 tmax = 10 filename = 'run.h5' with h5.HDFStore(filename) as store: for model in ('acute', 'chronic'): for SAT in (1, 2, 3): run(model, SAT, tmax, nruns, store) store.repack()
def get_downsampled(filename, by=None): base, ext = os.path.splitext(filename) filename_ds = base + '_downsampled' + ext if not os.path.exists(filename_ds): build_downsampled(filename, by=by) return h5.HDFStore(filename_ds, mode='r')
#!/usr/bin/python3 import sys sys.path.append('..') import h5 sys.path.pop() filename = '../population_size.h5' by = ['model', 'SAT', 'population_size', 'run'] columns = ['exposed', 'infectious', 'chronic'] where = dict(model='acute', SAT=1) where = ' & '.join(f'{k}={v}' for (k, v) in where.items()) with h5.HDFStore(filename, mode='r') as store: for (_, group) in store.groupby(by, columns=columns, debug=True, where=where): infected = group.sum(axis='columns') observed = (infected.iloc[-1] == 0) t = group.index.get_level_values('time (y)') time = t.max() - t.min() assert observed or (time == 10)