def run(algorithm, path, label, pre_simulation_hook=None): """ Simulate a reconstruction algorithm. The simulation results are stored in a HDF5 database rather than returned by the function. Parameters ---------- algorithm : function A function handle to the reconstruction algorithm. path : str The path of the HDF5 database where the results should be stored. label : str The label assigned to the simulation results pre_simumlation_hook : callable A handle to a callable which should be run *just* before the call to the reconstruction algorithm (the default is None, which implies that no pre hook is run). """ tmp_dir = _split_path(path)[0] + '.tmp' + os.sep tmp_file = tmp_dir + label.replace('/', '#') + '.hdf5' if not os.path.isdir(tmp_dir): os.mkdir(tmp_dir) if not os.path.isfile(tmp_file): _backup.create(tmp_file) done = _backup.get(tmp_file) shape = [len(_conf['delta']), len(_conf['rho'])] random.seed(_conf['seed']) seeds = np.array(random.sample(iter_range, shape[0] * shape[1])).reshape(shape) tasks = [(algorithm, (i, j), seeds[i, j], tmp_file, pre_simulation_hook) for i in range(shape[0]) for j in range(shape[1]) if not done[i, j]] _process(_simulate, args_list=tasks, maxtasks=_conf['maxpoints']) with _File(tmp_file, 'r') as f: stat_time = f.root.time[:] stat_dist = f.root.dist[:] stat_mse = f.root.mse[:] stat_norm = f.root.norm[:] with _File(path, 'a') as f: f.create_array('/' + label, 'time', stat_time, createparents=True) f.create_array('/' + label, 'dist', stat_dist, createparents=True) f.create_array('/' + label, 'mse', stat_mse, createparents=True) f.create_array('/' + label, 'norm', stat_norm, createparents=True) os.remove(tmp_file) if len(os.listdir(tmp_dir)) == 0: os.removedirs(tmp_dir)
def run(algorithm, path, label): """ Simulate a reconstruction algorithm. The simulation results are stored in a HDF5 database rather than returned by the function. Parameters ---------- algorithm : function A function handle to the reconstruction algorithm. path : str The path of the HDF5 database where the results should be stored. label : str The label assigned to the simulation results. """ tmp_dir = _split_path(path)[0] + '.tmp' + os.sep tmp_file = tmp_dir + label.replace('/', '#') + '.hdf5' if not os.path.isdir(tmp_dir): os.mkdir(tmp_dir) if not os.path.isfile(tmp_file): _backup.create(tmp_file) done = _backup.get(tmp_file) shape = _config.get() shape = [len(shape['delta']), len(shape['rho']), shape['monte_carlo']] np.random.seed(_config.get('seed')) seeds = np.random.randint(0, 2**30, shape) tasks = [(algorithm, (i, j), seeds[i, j], tmp_file) for i in range(shape[0]) for j in range(shape[1]) if not done[i, j]] _process(_simulate, args_list=tasks) with _File(tmp_file, 'r') as f: stat_time = f.root.time[:] stat_dist = f.root.dist[:] with _File(path, 'a') as f: f.create_array('/' + label, 'time', stat_time, createparents=True) f.create_array('/' + label, 'dist', stat_dist, createparents=True) os.remove(tmp_file) if len(os.listdir(tmp_dir)) == 0: os.removedirs(tmp_dir)
def create(path): """ Create the HDF5 backup database with the required arrays. The required arrays are an array for the simulation results, an array for the simulation timings, and an array for tracking the status. Parameters ---------- path : str The path of the HDF5 backup database. See Also -------- magni.cs.phase_transition.config : Configuration options. """ shape = [_conf[key] for key in ['delta', 'rho', 'monte_carlo']] shape = [len(shape[0]), len(shape[1]), shape[2]] time = dist = np.zeros(shape) status = np.zeros(shape[:2], np.bool8) try: with _File(path, 'w') as f: f.create_array('/', 'time', time) f.create_array('/', 'dist', dist) f.create_array('/', 'status', status) except BaseException as e: if os.path.exists(path): os.remove(path) raise e
def set(path, ij_tuple, stat_time, stat_dist): """ Store the simulation data of a specified point. Parameters ---------- path : str The path of the HDF5 backup database. ij_tuple : tuple A tuple (i, j) containing the parameters i, j as listed below. i : int The delta-index of the point in the delta-rho grid. j : int The rho-index of the point in the delta-rho grid. stat_dist : ndarray The simulation results of the specified point. stat_time : ndarray The simulation timings of the specified point. """ i, j = ij_tuple with _File(path, "a") as f: f.root.time[i, j] = stat_time f.root.dist[i, j] = stat_dist f.root.status[i, j] = True
def create(path): """ Create the HDF5 backup database with the required arrays. The required arrays are an array for the simulation results, an array for the simulation timings, and an array for tracking the status. Parameters ---------- path : str The path of the HDF5 backup database. See Also -------- magni.cs.phase_transition.config : Configuration options. """ shape = [_config.get(key) for key in ["delta", "rho", "monte_carlo"]] shape = [len(shape[0]), len(shape[1]), shape[2]] time = dist = np.zeros(shape) status = np.zeros(shape[:2], np.bool8) try: with _File(path, "w") as f: f.create_array("/", "time", time) f.create_array("/", "dist", dist) f.create_array("/", "status", status) except BaseException as e: if os.path.exists(path): os.remove(path) raise e
def set(path, ij_tuple, stat_time, stat_dist, stat_mse, stat_norm): """ Store the simulation data of a specified point. Parameters ---------- path : str The path of the HDF5 backup database. ij_tuple : tuple A tuple (i, j) containing the parameters i, j as listed below. i : int The delta-index of the point in the delta-rho grid. j : int The rho-index of the point in the delta-rho grid. stat_dist : ndarray The simulation results of the specified point. stat_time : ndarray The simulation timings of the specified point. stat_mse : ndarray The simulation mean squared error of the specified point. stat_norm : ndarray The simulation true vector 2-norm. """ i, j = ij_tuple with _File(path, 'a') as f: f.root.time[i, j] = stat_time f.root.dist[i, j] = stat_dist f.root.mse[i, j] = stat_mse f.root.norm[i, j] = stat_norm f.root.status[i, j] = True
def create(path): """ Create the HDF5 backup database with the required arrays. The required arrays are an array for the simulation results, an array for the simulation timings, and an array for tracking the status. Parameters ---------- path : str The path of the HDF5 backup database. See Also -------- magni.cs.phase_transition.config : Configuration options. """ shape = [_conf[key] for key in ['delta', 'rho', 'monte_carlo']] shape = [len(shape[0]), len(shape[1]), shape[2]] time = dist = mse = norm = np.zeros(shape) status = np.zeros(shape[:2], np.bool8) try: with _File(path, 'w') as f: f.create_array('/', 'time', time) f.create_array('/', 'dist', dist) f.create_array('/', 'mse', mse) f.create_array('/', 'norm', norm) f.create_array('/', 'status', status) except BaseException as e: if os.path.exists(path): os.remove(path) raise e
def run(path, label): """ Determine the phase transition from the simulation results. The simulation results should be present in the HDF5 database specified by `path` in the pytables group specified by `label` in an array named 'dist'. The determined phase transition is stored in the same HDF5 database, in the same pytables group in an array named 'phase_transition'. Parameters ---------- path : str The path of the HDF5 database. label : str The path of the pytables group in the HDF5 database. See Also -------- _estimate_PT : The actual phase transition estimation. Notes ----- A simulation is considered successful if the simulation result is less than 10 to the power of -4. """ _conf = _config.get() with _File(path, 'a') as f: if not '/' + label + '/phase_transition' in f: points = len(_conf['rho']) * _conf['monte_carlo'] z = np.zeros(points) y = np.zeros(points) rho = np.zeros(len(_conf['delta'])) dist = f.get_node('/' + label + '/dist')[:] for i in range(len(_conf['delta'])): n = np.round(_conf['delta'][i] * _conf['n']) for j in range(len(_conf['rho'])): if n > 0: var = _conf['rho'][j] var = np.round(var * n) / n else: var = 0. for m in range(_conf['monte_carlo']): z[j * _conf['monte_carlo'] + m] = var y[j * _conf['monte_carlo'] + m] = dist[i, j, m] < 1e-4 rho[i] = _estimate_PT(z, y) f.create_array('/' + label, 'phase_transition', rho)
def run(path, label): """ Determine the phase transition from the simulation results. The simulation results should be present in the HDF5 database specified by `path` in the pytables group specified by `label` in an array named 'dist'. The determined phase transition (50% curve) is stored in the same HDF5 database, in the same HDF group in an array named 'phase_transition'. Additionally, the 10%, 25%, 75%, and 90% percentiles are stored in an array named 'phase_transition_percentiles'. Parameters ---------- path : str The path of the HDF5 database. label : str The path of the pytables group in the HDF5 database. See Also -------- _estimate_PT : The actual phase transition estimation. Notes ----- A simulation is considered successful if the simulation result is less than a normalised mean squared error tolerance computed as 10^(-SNR/10) wtih SNR configured in the configuration module. """ percentiles = [0.5, 0.1, 0.25, 0.75, 0.9] with _File(path, 'a') as f: if not '/' + label + '/phase_transition' in f: dist = f.get_node('/' + label + '/dist')[:] # Set NaNs to value > NMSE_tolerance, i.e. assume failure dist[np.isnan(dist)] = 10**(-_conf['SNR'] / 10) * 2 if _conf['logit_solver'] == 'built-in': # Use "simple" built-in solver rho = _built_in_logit_solver(dist, percentiles) elif _conf['logit_solver'] == 'sklearn': # Use scikit learn solver rho = _sklearn_logit_solver(dist, percentiles) f.create_array('/' + label, 'phase_transition', rho[0]) f.create_array('/' + label, 'phase_transition_percentiles', rho[1:])
def _save_output(output_path, name, fig, fig_ext, datasets): """ Save figure and data output. Parameters ---------- output_path : str The output_path to save to. name : str The 'fixed' part of the file name saved to. fig : matplotlib.figure.Figure The figure instance to save. fig_ext : str The file extension to use for the saved figure. datasets : dict The dict of dicts for datasets to save in a HDF database. """ @_decorate_validation def validate_input(): _generic('output_path', 'string') _generic('name', 'string') _generic('fig', mpl.figure.Figure) _generic('fig_ext', 'string') _levels('datasets', (_generic(None, 'mapping'), _generic(None, 'mapping'))) validate_input() if output_path[-1] == os.sep: path = output_path prefix = '' else: path, prefix, no_ext = _split_path(output_path) prefix = prefix + '_' fig.savefig(path + prefix + name + os.path.extsep + fig_ext) db_path = path + prefix + name + '.hdf5' _io.create_database(db_path) with _File(db_path, mode='a') as h5file: data_group = h5file.create_group('/', 'data', __name__ + ': ' + name) for dataset in datasets: set_group = h5file.create_group(data_group, dataset, dataset) for array in datasets[dataset]: h5file.create_array(set_group, array, datasets[dataset][array])
def determine(algorithm, path, label='default', overwrite=False): """ Determine the phase transition of a reconstruction algorithm. The phase transition is determined from a number of monte carlo simulations on a delta-rho grid for a given problem suite. Parameters ---------- algorithm : function A function handle to the reconstruction algorithm. path : str The path of the HDF5 database where the results should be stored. label : str The label assigned to the phase transition (the default is 'default'). overwrite : bool A flag indicating if an existing phase transition should be overwritten if it has the same path and label (the default is False). See Also -------- magni.cs.phase_transition.config : Configuration options. magni.cs.phase_transition._simulation.run : The actual simulation. magni.cs.phase_transition._analysis.run : The actual phase determination. Examples -------- An example of how to use this function is provided in the `examples` folder in the `cs-phase_transition.ipynb` ipython notebook file. """ _validate_determine(algorithm, path, label, overwrite) if os.path.isfile(path): with _File(path, 'r') as f: if '/' + label in f: if overwrite: f.remove_node('/' + label, recursive=True) else: raise IOError("{!r} already uses the label {!r}." .format(path, label)) _simulation.run(algorithm, path, label) _analysis.run(path, label)
def load_phase_transition(path, label='default'): """ Load the coordinates of a phase transition from a HDF5 file. This function is used to load the phase transition from the output file generated by `magni.cs.phase_transition.determine`. Parameters ---------- path : str The path of the HDF5 file where the phase transition is stored. label : str The label assigned to the phase transition (the default is 'default'). Returns ------- delta : np.ndarray The delta values of the phase transition points. rho : np.ndarray The rho values of the phase transition points. See Also -------- magni.cs.phase_transition.determine : Phase transition determination. magni.cs.phase_transition.plotting : Phase transition plotting. Examples -------- An example of how to use this function is provided in the `examples` folder in the `cs-phase_transition.ipynb` ipython notebook file. """ @_decorate_validation def validate_input(): _generic('path', 'string') _generic('label', 'string') validate_input() with _File(path, 'r') as f: rho = f.get_node('/' + label + '/phase_transition')[:] delta = np.linspace(0, 1, len(rho) + 1)[1:] return (delta, rho)
def create_database(path, overwrite=True): """ Create a new HDF database that is annotated and chased. A new HDF database is created and it is annotated using `magni.reproducibility.io.annotate_database` and chased using `magni.reproducibility.io.annotate_database`. If the `overwrite` flag is true and existing database at `path` is overwritten. Parameters ---------- path : str The path to the HDF file that is to be created. overwrite : bool The flag that indicates if an existing database should be overwritten. See Also -------- magni.reproducibility.io.annotate_database : Database annotation magni.reproducibility.io.chase_database : Database chase Examples -------- Create a new database named 'new_db.hdf5': >>> from magni.reproducibility.io import create_database >>> create_database('new_db.hdf5') """ @_decorate_validation def validate_input(): _generic('path', 'string') _numeric('overwrite', 'boolean') validate_input() if not overwrite and os.path.exists(path): raise IOError('{!r} already exists in filesystem.'.format(path)) with _File(path, mode='w') as h5file: annotate_database(h5file) chase_database(h5file)
def get(path): """ Return which of the results have been stored. The returned value is a copy of the boolean status array indicating which of the results have been stored. Parameters ---------- path : str The path of the HDF5 backup database. Returns ------- status : ndarray The boolean status array. """ with _File(path, 'r') as f: done = f.root.status[:] return done
def get(path): """ Return which of the results have been stored. The returned value is a copy of the boolean status array indicating which of the results have been stored. Parameters ---------- path : str The path of the HDF5 backup database. Returns ------- status : ndarray The boolean status array. """ with _File(path, "r") as f: done = f.root.status[:] return done
def load_phase_transition(path, label='default', delta=None): """ Load the coordinates of a phase transition from a HDF5 file. This function is used to load the phase transition from the output file generated by `magni.cs.phase_transition.determine`. Parameters ---------- path : str The path of the HDF5 file where the phase transition is stored. label : str The label assigned to the phase transition (the default is 'default'). delta : ndarray The undersampling ratio values to use (the default is None which implies that a uniform spacing of values in the interval (0,1] is assumed.) Returns ------- delta : ndarray The delta values of the phase transition points. rho : ndarray The rho values of the phase transition points. See Also -------- magni.cs.phase_transition.determine : Phase transition determination. magni.cs.phase_transition.plotting : Phase transition plotting. Examples -------- An example of how to use this function is provided in the `examples` folder in the `cs-phase_transition.ipynb` ipython notebook file. """ @_decorate_validation def validate_input(): _generic('path', 'string') _generic('label', 'string') _numeric('delta', ('floating', 'integer'), range_='[0;1]', shape=(-1, ), ignore_none=True) @_decorate_validation def validate_output(): _numeric('rho', ('floating', 'integer'), range_='[0;1]', shape=(-1, )) _numeric('delta', ('floating', 'integer'), range_='[0;1]', shape=rho.shape) validate_input() with _File(path, 'r') as f: rho = f.get_node('/' + label + '/phase_transition')[:] if delta is None: delta = np.linspace(0, 1, len(rho) + 1)[1:] validate_output() return (delta, rho)
def determine(algorithm, path, label='default', overwrite=False, pre_simulation_hook=None): """ Determine the phase transition of a reconstruction algorithm. The phase transition is determined from a number of monte carlo simulations on a delta-rho grid for a given problem suite. Parameters ---------- algorithm : function A function handle to the reconstruction algorithm. path : str The path of the HDF5 database where the results should be stored. label : str The label assigned to the phase transition (the default is 'default'). overwrite : bool A flag indicating if an existing phase transition should be overwritten if it has the same path and label (the default is False). pre_simulation_hook : callable A handle to a callable which should be run *just* before the call to the reconstruction algorithm (the default is None, which implies that no pre hook is run). See Also -------- magni.cs.phase_transition._simulation.run : The actual simulation. magni.cs.phase_transition._analysis.run : The actual phase determination. Notes ----- The `pre_simulation_hook` may be used to setup the simulation to match the specfic simulation parameters, e.g. if an oracle estimator is used in the reconstruction algorithm. The `pre_simulation_hook` takes one argument which is the locals() dict. Examples -------- An example of how to use this function is provided in the `examples` folder in the `cs-phase_transition.ipynb` ipython notebook file. """ @_decorate_validation def validate_input(): _generic('algorithm', 'function') _generic('path', 'string') _generic('label', 'string') # regular expression matching invalid characters match = re.search(r'[^a-zA-Z0-9 ,.\-_/]', label) if match is not None: msg = 'The value of >>label<<, {!r}, may not contain {!r}.' raise RuntimeError(msg.format(label, match.group())) # regular expression matching labels without empty path components match = re.search(r'^([^/]+/)*[^/]+$', label) if match is None: msg = "The value of >>label<<, {!r}, may not contain '' folders." raise RuntimeError(msg.format(label)) _numeric('overwrite', 'boolean') if (pre_simulation_hook is not None and not callable(pre_simulation_hook)): raise RuntimeError('The >>pre_simulation_hook<< must be callable') validate_input() if os.path.isfile(path): with _File(path, 'a') as f: if '/' + label in f: if overwrite: f.remove_node('/' + label, recursive=True) else: raise IOError("{!r} already uses the label {!r}.".format( path, label)) _simulation.run(algorithm, path, label, pre_simulation_hook=pre_simulation_hook) _analysis.run(path, label)
def determine(algorithm, path, label='default', overwrite=False): """ Determine the phase transition of a reconstruction algorithm. The phase transition is determined from a number of monte carlo simulations on a delta-rho grid for a given problem suite. Parameters ---------- algorithm : function A function handle to the reconstruction algorithm. path : str The path of the HDF5 database where the results should be stored. label : str The label assigned to the phase transition (the default is 'default'). overwrite : bool A flag indicating if an existing phase transition should be overwritten if it has the same path and label (the default is False). See Also -------- magni.cs.phase_transition._simulation.run : The actual simulation. magni.cs.phase_transition._analysis.run : The actual phase determination. Examples -------- An example of how to use this function is provided in the `examples` folder in the `cs-phase_transition.ipynb` ipython notebook file. """ @_decorate_validation def validate_input(): _generic('algorithm', 'function') _generic('path', 'string') _generic('label', 'string') # regular expression matching invalid characters match = re.search(r'[^a-zA-Z0-9 ,.\-_/]', label) if match is not None: msg = 'The value of >>label<<, {!r}, may not contain {!r}.' raise RuntimeError(msg.format(label, match.group())) # regular expression matching labels without empty path components match = re.search(r'^([^/]+/)*[^/]+$', label) if match is None: msg = "The value of >>label<<, {!r}, may not contain '' folders." raise RuntimeError(msg.format(label)) _numeric('overwrite', 'boolean') validate_input() if os.path.isfile(path): with _File(path, 'r') as f: if '/' + label in f: if overwrite: f.remove_node('/' + label, recursive=True) else: raise IOError("{!r} already uses the label {!r}." .format(path, label)) _simulation.run(algorithm, path, label) _analysis.run(path, label)