def _configure_logger(self,o_log=None): """configure the logging service Configuration of the log object has different behavior based upon the type passed into the argument o_log. If o_log is PyposmatLogFile, that object will be accessed by reference. A string is assumed to be a filename location. By default the argument for o_log is None, which means logging will go to standard out by means of the print() function. Args: o_log (str,PyposmatLogFile,optional): default: None Raises: TypeError """ assert type(o_log) in [type(None),str,PyposmatLogFile] if type(o_log) is PyposmatLogFile: self.obj_log = o_log elif type(o_log) is str: self.obj_log = PyposmatLogFile(filename=o_log) elif type(o_log) is type(None): self.obj_log = PyposmatLogFile() else: m = "o_log must be str, PyposmatLogFile, or None" raise TypeError(m)
def configure_logger(self,o_log=None,log_to_stdout=True): """ Configurtion of the log object has different behavior based upon the type passed into the argument o_log. If o_log is PyposmatLogFile, that object will be accessed by reference. A string is assumed to be a filename location. By default the argument for o_log is None, which means logging will go to standard out by means of the print() function. Args: o_log (str,PyposmatLogFile,None): default: None """ if type(o_log) is PyposmatLogFile: self.obj_log = o_log elif type(o_log) is str: self.obj_log = PyposmatLogFile(filename=o_log) elif o_log is None: self.obj_log = None else: m = "log object must be str, PyposmatLogFile, or None" raise TypeError(m) if isinstance(log_to_stdout,bool): self.log_to_stdout = log_to_stdout else: m = "log_to_stdout must be boolean" raise TypeError(m)
def initialize_logger(self, log_fn=None, log_to_stdout=None): """initialize log object Args: log_fn(str,optional) """ assert type(log_fn) in [type(None), str] assert type(log_to_stdout) in [type(None), bool] if log_fn is None: self.log_fn = os.path.join(self.root_directory, self.data_directory, 'pyposmat.log') else: self.log_fn = log_fn self.o_log = PyposmatLogFile(filename=self.log_fn) self.log_to_stdout = log_to_stdout
class PyposmatMonteCarloSampler(PyposmatEngine): def __init__(self, filename_in='pyposmat.config.in', filename_out='pyposmat.results.out', o_log=None, mpi_rank=None, mpi_size=None, base_directory=None): """Additional attributes are set by the base class :obj:PyposmatEngine Args: filename_in (str) - path of the configuration file filename_out (str) - path of the output file o_log (PyposmatLogFile) - if type(o_log) is a string, then the string is treated as a path in which to log information to. If type(o_log) is PyposmatLogFile then it is set as an attribute for the refernce. mpi_rank (int) mpi_size (int) base_directory (str,optional): Either the relative or full path which provides a unique drive addressing space for simultaneously running simulations. Attributes: mpi_rank (int) - this is passed in mpi_size (int) - this is passed in pyposmat_data_in_filename (str) - the path of the datafile to read in pyposmat_data_out_filename (str) - the path of the datafile to write simulation results to """ assert isinstance(filename_in, str) assert isinstance(filename_out, str) assert type(base_directory) in [str, type(None)] PyposmatEngine.__init__(self, filename_in=filename_in, filename_out=filename_out, base_directory=base_directory, fullauto=False) if mpi_rank is None: self.mpi_rank = 0 else: self.mpi_rank = mpi_rank if mpi_size is None: self.mpi_size = 1 else: self.mpi_size = mpi_size assert self.mpi_rank < self.mpi_size self.mpi_rank = mpi_rank self.mpi_size = mpi_size self.pyposmat_data_in_filename = None self.pyposmat_data_out_filename = filename_out self.pyposmat_badparameters_filename = 'pyposmat.badparameters.out' try: self.configure_logger(o_log) except TypeError as e: m = "Unable to to configure obj_log based on attribute log:{}".format( str(o_log)) raise TypeError(m) def configure_logger(self, o_log=None): """ Configurtion of the log object has different behavior based upon the type passed into the argument o_log. If o_log is PyposmatLogFile, that object will be accessed by reference. A string is assumed to be a filename location. By default the argument for o_log is None, which means logging will go to standard out by means of the print() function. Args: o_log (str,PyposmatLogFile,None): default: None """ if type(o_log) is PyposmatLogFile: self.obj_log = o_log elif type(o_log) is str: self.obj_log = PyposmatLogFile(filename=o_log) elif o_log is None: self.obj_log = None else: m = "log object must be str, PyposmatLogFile, or None" raise TypeError(m) def log(self, str_msg): if type(str_msg) is str: m = str_msg elif type(str_msg) is list: m = "\n".join(str_msg) if type(self.obj_log) is PyposmatLogFile: self.obj_log.write(m) print(m) def configure_pyposmat_datafile_in(self, filename): self.pyposmat_data_in_filename = filename self.pyposmat_datafile_in = PyposmatDataFile(filename) def configure_pyposmat_datafile_out(self, filename=None): if filename is not None: assert type(filename) is str self.pyposmat_data_out_filename = filename self.pyposmat_datafile_out = PyposmatDataFile(filename) def configure_pyposmat_badparameters_file(self, filename=None): if filename is not None: assert type(filename) is str self.pyposmat_badparameters_filename = filename self.pyposmat_badparameters = PyposmatBadParametersFile( filename=self.pyposmat_badparameters_filename, o_config=self.configuration) def read_configuration_file(self, filename=None): PyposmatEngine.read_configuration_file(self, filename=filename) # self.structure_directory = self.configuration.structures['structure_directory'] self.n_iterations = self.configuration.sampling_type['n_iterations'] self.parameter_names = [ p for p in self.configuration.sampling_distribution ] self.qoi_names = [k for k in self.configuration.qois] self.error_names = ['{}.err'.format(k) for k in self.qoi_names] self.parameter_distribution_definition =\ self.configuration.sampling_distribution try: self.free_parameter_names = [ k for k, v in self.parameter_distribution_definition.items() if v[0] != 'equals' ] except KeyError as e: print(self.parameter_distribution_definition.items()) raise if self.configuration.sampling_constraints is not None: self.parameter_constraints = copy.deepcopy( self.configuration.sampling_constraints) else: self.parameter_constraints = OrderedDict() self.constrained_parameter_names = [] for p in self.parameter_names: if p not in self.free_parameter_names: self.constrained_parameter_names.append(p) def run_simulations(self, i_iteration, n_samples=None, filename=None): """ Args: i_iteration(int): the iteration cycle we are on. n_samples(int,optional): the number of parameters to evaluate filename(str,optional): the filename """ assert type(i_iteration) is int assert type(n_samples) in [type(None), int] assert type(filename) in [type(None), str] i = i_iteration _sampling_type = self.configuration.sampling_type[i]['type'] _n_samples = self.configuration.sampling_type[i]['n_samples'] if self.mpi_rank == 0: m = [ "R{}: Starting iteration N={}".format(self.mpi_rank, i_iteration) ] if _sampling_type is "from_file": m += [ "R{}: Sampling parameters from {}".format( self.mpi_rank, filename) ] else: m += [ "R{}: Attemping n_samples={} with sampling_type={}".format( self.mpi_rank, _n_samples, _sampling_type) ] if filename is not None: m += ["R{}: Using file:{}".format(self.mpi_rank, filename)] self.log(m) if n_samples is not None: _n_samples = n_samples if _sampling_type == 'parametric': self.run_parameteric_sampling(n_samples=_n_samples) elif _sampling_type == 'kde': if filename is None: raise ValueError('cannot do kde sampling with out filename') self.run_kde_sampling(n_samples=_n_samples, filename_in=filename) elif _sampling_type == 'from_file': if filename is None: raise ValueError('cannot do filesampling without file') self.run_file_sampling(filename) else: raise ValueError('unknown sampling type:{}'.format(_sampling_type)) def write_badparameters_header(self): self.pyposmat_badparameters.write_header_section( filename=self.pyposmat_badparameters_filename) def write_data_out_header(self): self.pyposmat_datafile_out.write_header_section( filename=self.pyposmat_data_out_filename, parameter_names=self.parameter_names, qoi_names=self.qoi_names, error_names=self.error_names) def get_sim_id(self, i, s=None): if s is not None: return s elif isinstance(i, int): return str(i) else: m = 'cannot determine sim_id from i:{} and s:{}'.format(i, s) raise TypeError(m) def run_parameteric_sampling(self, n_samples): # create random number generator _rv_generators = OrderedDict() for p in self.free_parameter_names: distribution_type = self.parameter_distribution_definition[p][0] if distribution_type == 'uniform': _a = self.parameter_distribution_definition[p][1]['a'] _b = self.parameter_distribution_definition[p][1]['b'] _loc = _a _scale = _b - _a _rv_generators[p] = scipy.stats.uniform(loc=_loc, scale=_scale) elif distribution_type == 'normal': _mu = self.parameter_distribution_definition[p][1]['mu'] _sigma = self.parameter_distribution_definition[p][1]['sigma'] _loc = _mu _scale = _sigma _rv_generators[p] = scipy.stats.norm(loc=_loc, scale=_scale) else: raise ValueError( 'unknown distribution type: {}'.format(distribution_type)) self.write_data_out_header() self.write_badparameters_header() time_start_iteration = time.time() _n_errors = 0 for i_sample in range(n_samples): # determin sim_id sim_id = self.get_sim_id(i=i_sample) # new OrderedDict to hold in parameter values _parameters = OrderedDict([(p, None) for p in self.parameter_names]) # generate free parameters for ordered dictionary for p in self.free_parameter_names: _parameters[p] = _rv_generators[p].rvs(size=1)[0] # determine parameters determined from equality constraints for p in self.constrained_parameter_names: _constraint_type = self.parameter_distribution_definition[p][0] if _constraint_type == 'equals': # this condition is for fitting EoS for EAM function which # requires a refernce ground state crystal structure if p.endswith('latticetype'): _v = self.parameter_distribution_definition[p][1] _parameters[p] = _v # process evaluation strings elif type(self.parameter_distribution_definition[p] [1]) is not list: _str_eval = str( self.parameter_distribution_definition[p][1]) # replace string values with numerical values for fp in self.free_parameter_names: if fp in _str_eval: _str_eval = _str_eval.replace( fp, str(_parameters[fp])) # evaluate the string into a float _parameters[p] = eval(_str_eval) else: raise ValueError("oops") # additional tasks added here for p in self.constrained_parameter_names: if self.parameter_distribution_definition[p][0] == 'equals': if type(self.parameter_distribution_definition[p] [1]) is list: # required for EAM potentials to calculate dens_max for embedding function if self.parameter_distribution_definition[p][1][ 0] == 'equilibrium_density': a0 = self.parameter_distribution_definition[p][1][ 1] latt = self.parameter_distribution_definition[p][ 1][2] _parameters[ p] = self.calculate_equilibrium_density( a0, latt, _parameters) try: # check constraints for k, v in self.parameter_constraints.items(): _eval_str = v for pn, pv in _parameters.items(): _eval_str = _eval_str.replace(pn, str(pv)) if eval(_eval_str) is False: m = "failed parameter constraint, {}".format(k) raise PyposmatBadParameterError(m, parameters=_parameters) _results = self.evaluate_parameter_set(parameters=_parameters) except PyposmatBadParameterError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 except LammpsSimulationError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 except PypospackTaskManagerError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 except PypospackBadEamEosError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 else: #if type(sim_id) is float: # _sim_id = int(sim_id) _sim_id = "{}".format(i_sample) self.pyposmat_datafile_out.write_simulation_results( filename=self.pyposmat_data_out_filename, sim_id=_sim_id, results=_results) finally: # print out summaries every 10 solutions if (i_sample + 1) % 10 == 0: n_samples_completed = i_sample + 1 time_end = time.time() time_total = time_end - time_start_iteration avg_time = time_total / n_samples_completed _str_msg = 'R{}:{} samples completed in {:.4f}s. Avg_time = {:.4f}. n_errors = {}'.format( self.mpi_rank, n_samples_completed, time_total, avg_time, _n_errors) self.log(_str_msg) def get_options_kde_bandwidth(self): """ Returns: OrderedDict """ kde_options = OrderedDict() kde_options['chiu1999'] = OrderedDict() kde_options['chiu1999'][ 'reference'] = 'Chiu, S.T. Ann. Stat. 1991, Vol. 19, No 4. 1883-1905' kde_options['chiu1999']['doi'] = '10.1214/aos/1176348376' kde_options['chiu1999']['description'] = "" kde_options['silverman1984'] = OrderedDict() kde_options['silverman1984'][ 'reference'] = 'Silverman, B.W. (1986). Density Estimation for Statistics and Data Analysis. London: Chapman & Hall/CRC. p. 48' kde_options['silverman1984']['isbn'] = '0-412-24620-1' def determine_kde_bandwidth(self, X, kde_bw_type): """ determine kde bandwidth Args: X(np.ndarray): array of data to determine the KDE bandwidth kde_bw_type(str): the method of estimating the optimal bandwidth """ if self.mpi_rank == 0: self.log('determine kde bandwidth...') if kde_bw_type == 'chiu1999': try: h = Chiu1999_h(X) except ValueError as e: print(X) raise elif kde_bw_type == 'silverman1985': h = Silverman1986 else: m = 'kde_bw_type, {}, is not an implemented bandwidth type' raise PypospackBadKdeBandwidthType(m) if self.mpi_rank == 0: self.log('{}:{}'.format(kde_bw_type, h)) self.kde_bw_type = kde_bw_type self.kde_bw = h return self.kde_bw def run_kde_sampling(self, n_samples, filename_in, cluster_id=None, kde_bw_type='chiu1999'): """ sample from a KDE distribution Args: n_samples(int): the number of samples to draw from the KDE distribution filename_in(str): the path to the datafile from which the parameters will be drawn from cluster_id(int): if we need to use a specific cluster_id, we specify it here. otherwise, it will be drawn from all parameters contained within the set. kde_bw_type(str): the method of estimating the optimal bandwidth """ _datafile_in = PyposmatDataFile() _datafile_in.read(filename_in) if cluster_id is None: _free_parameter_names = [str(v) for v in self.free_parameter_names] _X = _datafile_in.df[_free_parameter_names].values.T else: # subselect the dataframe by the cluster_id of interest _datafile_in.df = _datafile_in.df.loc[_datafile_in.df['cluster_id'] == cluster_id] _X = _datafile_in.df[self.free_parameter_names].loc[ _datafile_in.df['cluster_id'] == cluster_id].values.T # self.log.write("cluster_id {c} _X.shape={x}".format(c=cluster_id, x=_X.shape)) kde_bw = self.determine_kde_bandwidth(X=_X, kde_bw_type=kde_bw_type) _rv_generator = scipy.stats.gaussian_kde(_X, kde_bw) self.write_data_out_header() self.write_badparameters_header() time_start_iteration = time.time() _n_errors = 0 for i_sample in range(n_samples): # determine sim_id sim_id = self.get_sim_id(i=i_sample) # new OrderedDict to hold in parameter values _parameters = OrderedDict([(p, None) for p in self.parameter_names]) # generate free parameters for ordered dictionary _free_parameters = _rv_generator.resample(1) for i, v in enumerate(self.free_parameter_names): _parameters[v] = float(_free_parameters[i, 0]) # determine parameters determined from equality constraints for p in self.constrained_parameter_names: _constraint_type = self.parameter_distribution_definition[p][0] if _constraint_type == 'equals': # this condition is for fitting EoS for EAM function which # requires a refernce ground state crystal structure if p.endswith('latticetype'): _v = self.parameter_distribution_definition[p][1] _parameters[p] = _v # process evaluation strings elif type(self.parameter_distribution_definition[p] [1]) is not list: _str_eval = str( self.parameter_distribution_definition[p][1]) # replace string values with numerical values for fp in self.free_parameter_names: if fp in _str_eval: _str_eval = _str_eval.replace( fp, str(_parameters[fp])) # evaluate the string into a float _parameters[p] = eval(_str_eval) else: raise ValueError("oops") for p in self.constrained_parameter_names: if self.parameter_distribution_definition[p][0] == 'equals': # some EAM potentials have a normalizing equilbirum density # which have to be determined based upon the parameterization of # the electron density function if type(self.parameter_distribution_definition[p] [1]) is list: if self.parameter_distribution_definition[p][1][ 0] == 'equilibrium_density': a0 = self.parameter_distribution_definition[p][1][ 1] latt = self.parameter_distribution_definition[p][ 1][2] _parameters[ p] = self.calculate_equilibrium_density( a0, latt, _parameters) try: # now we check parameter inequality constraints for k, v in self.parameter_constraints.items(): _eval_str = v for pn, pv in _parameters.items(): _eval_str = _eval_str.replace(pn, str(pv)) if eval(_eval_str) is False: s = 'parameter constraint failed, {}'.format(k) raise PyposmatBadParameterError(s, parameters=_parameters) _results = self.evaluate_parameter_set(parameters=_parameters) except PyposmatBadParameterError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 except LammpsSimulationError as e: assert isinstance(self.pyposmat_badparameters, PyposmatBadParametersFile) assert isinstance(self.pyposmat_badparameters.parameter_names, list) self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 except PypospackTaskManagerError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 except PypospackBadEamEosError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 else: # determine sim_id _sim_id = int(i_sample) self.pyposmat_datafile_out.write_simulation_results( filename=self.pyposmat_data_out_filename, sim_id=i_sample, cluster_id=cluster_id, results=_results) finally: # print out summaries every 10 solutions if (i_sample + 1) % 10 == 0: n_samples_completed = i_sample + 1 time_end = time.time() time_total = time_end - time_start_iteration avg_time = time_total / n_samples_completed _str_msg = 'R{}:{} samples completed in {:.4f}s. Avg_time = {:.4f}. n_errors = {}'.format( self.mpi_rank, n_samples_completed, time_total, avg_time, _n_errors) self.log(_str_msg) d = OrderedDict() d['kde_bandwidth'] = OrderedDict() d['kde_bandwidth']['type'] = self.kde_bw_type d['kde_bandwidth']['h'] = self.kde_bw def run_file_sampling(self, filename_in): _datafile_in = PyposmatDataFile(filename=filename_in) _datafile_in.read() # configure random number generator self.write_data_out_header() self.write_badparameters_header() time_start_iteration = time.time() _n_errors = 0 i_sample = 0 for row in _datafile_in.df.iterrows(): if self.mpi_rank != i_sample % self.mpi_size: i_sample += 1 continue else: i_sample += 1 _parameters = OrderedDict([(p, row[1][p]) for p in self.parameter_names]) _sim_id = row[1]['sim_id'] # generate wierd things for p in self.constrained_parameter_names: if self.parameter_distribution_definition[p][0] == 'equals': if type(self.parameter_distribution_definition[p] [1]) is list: if self.parameter_distribution_definition[p][1][ 0] == 'equilibrium_density': a0 = self.parameter_distribution_definition[p][1][ 1] latt = self.parameter_distribution_definition[p][ 1][2] _parameters[ p] = self.calculate_equilibrium_density( a0, latt, _parameters) try: # check constraints for k, v in self.parameter_constraints.items(): _eval_str = v for pn, pv in _parameters.items(): _eval_str = _eval_str.replace(pn, str(pv)) if eval(_eval_str) is False: raise PyposmatBadParameterError() _results = self.evaluate_parameter_set(parameters=_parameters) except PyposmatBadParameterError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 except LammpsSimulationError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 except PypospackTaskManagerError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 except PypospackBadEamEosError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 else: if type(_sim_id) is float: _sim_id = int(sim_id) self.pyposmat_datafile_out.write_simulation_results( filename=self.pyposmat_data_out_filename, sim_id=_sim_id, results=_results) finally: # print out summaries every 10 solutions i_sample = i_sample + 1 if (i_sample) % 10 == 0: n_samples_completed = i_sample time_end = time.time() time_total = time_end - time_start_iteration avg_time = time_total / n_samples_completed _str_msg = '{} samples completed in {:.4f}s. Avg_time = {:.4f}. n_errors = {}'.format( n_samples_completed, time_total, avg_time, _n_errors) print('rank{}:'.format(self.mpi_rank) + _str_msg) def calculate_equilibrium_density(self, a0, latt, parameters): _parameters = OrderedDict() for k, v in parameters.items(): if k.startswith('d_'): _parameters[k[2:]] = v s = k[2:].split('_')[0] _potential_type = self.configuration.potential['density_type'] _symbols = self.configuration.potential['symbols'] _module_name, _class_name = PotentialObjectMap( potential_type=_potential_type) try: _module = importlib.import_module(_module_name) _class = getattr(_module, _class_name) _dens_potential = _class(symbols=_symbols) except: raise if latt == 'fcc': d = OrderedDict([('1NN', 2 / (2**0.5) * a0), ('2NN', 1.000 * a0), ('3NN', 1.225 * a0)]) Z = OrderedDict([('1NN', 12), ('2NN', 6), ('3NN', 24)]) rcut = (d['2NN'] + d['3NN']) / 2. rmax = 10. r = np.linspace(1, 10, 5000) * rmax / 10 rho = _dens_potential.evaluate(r, _parameters, rcut) rho_e = 0 for m in Z: if d[m] < rcut: rho_e += Z[m] * np.interp(d[m], r, rho[s]) return rho_e def print_structure_database(self): m = [ 80 * '-', '{:^80}'.format('STRUCTURE DATABASE'), 80 * '-', 'structure_directory:{}'.format(self.structure_directory), '', '{:^20} {:^20}'.format('name', 'filename'), '{} {}'.format(20 * '-', 20 * '-') ] m += [ '{:20} {:20}'.format(k, v) for k, v in self.structures['structures'].items() ] self.log(m) def print_sampling_configuration(self): print(80 * '-') print('{:^80}'.format('SAMPLING CONFIGURATION')) print(80 * '-') print('{:^10} {:^10} {:^20}'.format('iteration', 'n_samples', 'sampling_type')) print('{} {} {}'.format(10 * '-', 10 * '-', 20 * '-')) for i in range(self.n_iterations): _sample_type = self.configuration.sampling_type[i]['type'] if _sample_type == 'kde_w_clusters': _n_samples = self.configuration.sampling_type[i][ 'n_samples_per_cluster'] else: _n_samples = self.configuration.sampling_type[i]['n_samples'] print('{:^10} {:^10} {:^20}'.format(i, _n_samples, _sample_type)) def print_initial_parameter_distribution(self): print(80 * '-') print('{:80}'.format('INITIAL PARAMETER DISTRIBUTION')) print(80 * '-') for p in self.parameter_distribution_definition: if p in self.free_parameter_names: str_free = 'free' if self.parameter_distribution_definition[p][0] == 'uniform': print('{:^20} {:^10} {:^10} {:^10} {:^10}'.format( p, str_free, self.parameter_distribution_definition[p][0], self.parameter_distribution_definition[p][1]['a'], self.parameter_distribution_definition[p][1]['b'])) elif self.parameter_distribution_definition[p][0] == 'normal': print('{:^20} {:^10} {:^10} {:^10} {:^10}'.format( p, str_free, self.parameter_distribution_definition[p][0], self.parameter_distribution_definition[p][1]['mu'], self.parameter_distribution_definition[p][1]['sigma'])) else: _distribution_type = self.parameter_distribution_defintion[ p][0] s = "incorrection parameter distribution for parameter {}. probability distribution function, {}, is not supported" s = s.format(p, _distribution_type) raise ValueError(s) else: str_free = 'not_free' print('{:^20} {:^10}'.format(p, str_free))
class PyposmatSampler(PyposmatEngine): """ Base Sampling Engine to build other engines upon Args: config_fn (str): filename of the configuration file data_out_fn (str): filename where to output the the simulation results Attributes: config_fn(str): filename of the configuration file data_in_fn(str):filename where to get previous simulation results data_out_fn(str):filename where to output the current simulation results parameters_fn(str):filename where to output current simulation results data_in(:obj:PyposmatDataFile): object for reading in a data file data_out(:obj:PyposmatDataFile): object for write out a data file """ def __init__(self, configuration='pyposmat.configuration.yaml', mpi_rank=None, mpi_size=None, base_directory=None): assert isinstance(configuration,str) \ or isinstance(configuration,PyposmatConfiguration) assert isinstance(results,str) \ or isinstance(results,PyposmatData) assert # check types for the attributes assert type(config_fn) is str assert type(results_fn) is str assert type(data_in_fn) in [type(None),str] assert type(o_config) in [type(None),PyposmatConfigurationFile] assert type(o_log) in [type(None), PyposmatLogFile] assert type(mpi_rank) in [type(None),int] assert type(mpi_size) in [type(None),int] assert type(base_directory) in [type(None),str] super().__init__( filename_in=config_fn, filename_out=results_fn, base_directory=base_directory, fullauto=False) # default values for mpi_attributes self.mpi_rank = 0 self.mpi_size = 1 self._configure_mpi_attributes(mpi_rank=mpi_rank,mpi_size=mpi_size) # set up necessary filenames self.config_fn = config_fn self.data_in_fn = None self.data_out_fn = results_fn self.bad_parameters_fn = bad_parameters_fn # data_objects self.data_in = None self.data_out = None # configure log object self.obj_log = None self._configure_logger(o_log) # private attributes self._parameter_constraints = None def _configure_mpi_attributes(self,mpi_rank,mpi_size): # default values, these are set in __init__() but declared here for # clarity self.mpi_rank = 0 self.mpi_size = 1 # we enforce the condition that mpi_rank and mpi_size are both integer types # and that the mpi rank_id is less than the total number of mpi_ranks if all([type(mpi_rank) is int,type(mpi_size) is int]): if mpi_rank < mpi_size: self.mpi_rank = mpi_rank self.mpi_size = mpi_size def _configure_logger(self,o_log=None): """configure the logging service Configuration of the log object has different behavior based upon the type passed into the argument o_log. If o_log is PyposmatLogFile, that object will be accessed by reference. A string is assumed to be a filename location. By default the argument for o_log is None, which means logging will go to standard out by means of the print() function. Args: o_log (str,PyposmatLogFile,optional): default: None Raises: TypeError """ assert type(o_log) in [type(None),str,PyposmatLogFile] if type(o_log) is PyposmatLogFile: self.obj_log = o_log elif type(o_log) is str: self.obj_log = PyposmatLogFile(filename=o_log) elif type(o_log) is type(None): self.obj_log = PyposmatLogFile() else: m = "o_log must be str, PyposmatLogFile, or None" raise TypeError(m) @property def n_iterations(self): if type(self.configuration) is not type(None): return self.configuration.sampling_type['n_iterations'] else: return None @property def parameter_names(self): if type(self.configuration) is not type(None): return self.configuration.parameter_names else: return None @property def qoi_names(self): if type(self.configuration) is not type(None): return self.configuration.qoi_names else: return None @property def error_names(self): if type(self.configuration) is not type(None): return self.configuration.error_names else: return None @property def parameter_distribution_definition(self): if type(self.configuation) is not type(None): return self.configuration.sampling_distribution else: return None @property def free_parameter_names(self): if type(self.configuration) is not type(None): return self.configuration.free_parameter_names else: return None @property def parameter_constraints(self): if type(self.configuration) is not type(None): if type(self._parameter_constraints) is type(None): return self.configuration.sampling_constraints else: return None else: return None @property def constrained_parameter_names(self): if type(self.configuration) is not type(None): return [p for p in self.parameter_names if p not in self.free_parameter_names] else: return None def log(self,str_msg): """log message to log file Args: str_msg (str,list): Raises: TypeError: If type(str_msg) not either a :obj:str or a :obj:list of :obj:str """ assert type(str_msg) in [str,list] if type(str_msg) is list: assert all([type(v) is str for v in str_msg]) self.obj_log.write(m) if type(str_msg) is str: m = str_msg elif type(str_msg) is list: m = "\n".join(str_msg) else: m = "str_msg must be either be a str or a list of str" raise TypeError(m) self.obj_log.write(m) def read_configuration_file(self,filename=None): """read the pyposmat configuration file This method overrides the inherited method. Args: filename(str,optional):path of the filename. If the filename is not specified, then the method will run using the class attribute, `config_fn` Returns: Nothing returned Raises: TypeError """ # In the previous iteration, this set a bunch of public attributes. I # have reimplemented them as properties because it is much easier for an # external developer to understand property implemntation rather than search # for a property which maybe mutated. # -- EJR, 2/17/2019 assert type(filename) in [type(None),str] if type(filename) is type(None): _filename = self.config_fn elif type(filename) is str: _filename = filename else: m = "filename must either be a str or NoneType" raise(TypeError(m)) super().read_configuration_file(filename=_filename) def configure_pyposmat_datafile_in(self,filename=None): """ configures the data_in attribute Args: filename(str): path of the input file to be used """ assert type(filename) in [type(None),str] if type(filename) is str: self.data_in_fn = filename _filename = self.data_in_fn self.data_in = PyposmatDataFile(filename=_filename) def configure_pyposmat_datafile_out(self,filename): """ configures the data_out attribute Args: filename(str): path of the output file to be used """ assert type(filename) in [type(None),str] if type(filename) is str: self.data_out_fn = filename _filename = self.data_out_fn self.data_out = PyposmatDataFile(filename=_filename) def initalize_sampler(self): raise NotImplementedError def generate_free_parameters(self): """ stub implementation which needs to be overrided by the inheriting class""" free_parameters = OrderedDict() for p in self.free_parameter_names: free_parameters[p] = 0. return free_parameters def enforce_parameter_equality_constraints(self,free_parameters): constrained_parameters = OrderedDict() for p in self.constrained_parameter_names: _constraint_type =self.parameter_distribution_definition[p][0] if _constraint_type == 'equals': if p.endswith('latticetype'): constrainted_parameters[p] = self.parameter_distribution[p][1] # evaluate the strings elif type(self.parameter_distribution_definition[p][1]) is not list: # get the string to evaluate s = str(self.parameter_distribution_definition[p][1]) # replace string values with numerical values for fp in self.free_parameter_names: if fp in s: s = s.replace(fp,str(free_parameters[fp])) # the string can now be evaluated as a float constrainted_parameters[p] = eval(s) def enforce_parameter_inequality_constraints(self,parameters): # evaluation string for k,v in self.parameter_constraints.items(): eval_str = v for pn,pv in parameters.items(): eval_str = eval_str.replace(pn,str(pv)) if not eval(eval_str): raise PyposmatBadParameterError() def run_simulations(self,i_iteration,n_samples=None,filename=None): """ base method to override """ assert type(i_iteration) is int assert type(n_samples) in [type(None),int] assert type(filename) in [type(None),str] # define some convenience local variables for readability i = i_iteration if n_samples is not None: _n_samples = self.configuration.sampling_type[i]['n_samples'] else: _n_samples = n_samples _sampling_type = self.configuration.sampling_type[i]['type'] if filename is not None: _filename = self.configuration.sampling_type[i][n_samples] else: pass
class PyposmatIterativeSampler(object): """ Iterative Sampler which wraps multiple simulation algorithms. This class wraps multiple simulation algorithms so that they can be run in an iterative manner. Since this class has so many configuration options, the attributes of this class is set by a YAML based configuration file. The class PyposmatConfigurationFile aids in the creation and reading of these options. These attributes are public and be set programmatically within a script. Notes: config_fn = 'data/pyposmat.config.in' engine = PyposmatIterativeSampler(configuration_filename=config_fn) engine.read_configuration_file() engine.run_all() Args: configuration_filename(str): the filename of the YAML configuration file is_restart(bool,optional): When set to True, this argument controls the restart behavior of this class. By default, is set to False is_auto(bool,optional): When set to True, this agument will automatically configure the class. By default this is set to False, mostly because this software is currently in development, and this necessary to to write integration testing log_fn(str,optional): This the filename path where to set logging, by default it is set as `pyposmat.log` contained in the configurable data directory log_to_stdout(bool,optional): When set to True, all log messages will be directed to standard out as well as the log file Attributes: mpi_comm(MPI.Intracomm) mpi_rank(int) mpi_size(int) mpi_nprocs(int) i_iteration(int) n_iterations(int) rv_seed(int) rv_seeds(np.ndarray) configuration_filename = configuration(filename) configuration(PyposmatConfigurationFile) mc_sampler(PyposmatMonteCarloSampler) root_directory(str) data_directory(str) is_restart(bool) start_iteration=0 """ parameter_sampling_types = [ 'parametric', 'kde', 'from_file', 'kde_w_clusters' ] def __init__(self, configuration_filename, is_restart=False, is_auto=False, log_fn=None, log_to_stdout=True): # formats should not contain a trailing end line chracter self.SECTION_HEADER_FORMAT = "\n".join([80 * '=', "{:^80}", 80 * "="]) self.RANK_DIR_FORMAT = 'rank_{}' self.mpi_comm = None self.mpi_rank = None self.mpi_size = None self.mpi_nprocs = None self.i_iteration = None self.rv_seed = None self.rv_seeds = None self.configuration_filename = configuration_filename self.configuration = None self.mc_sampler = None self.root_directory = os.getcwd() self.data_directory = 'data' self.is_restart = is_restart self.start_iteration = 0 self.log_fn = log_fn self.log_to_stdout = log_to_stdout self.o_log = None self.initialize_logger(log_fn=log_fn, log_to_stdout=log_to_stdout) if self.is_restart: self.delete_mpi_rank_directories() @property def structure_directory(self): if self.configuration is None: return None else: d = self.configuration.structures['structure_directory'] if not os.path.isabs(d): d = os.path.join(self.root_directory, d) return d @property def n_iterations(self): if self.configuration is None: return None else: return self.configuration.n_iterations @property def qoi_names(self): if self.configuration is None: return None else: return self.configuration.qoi_names @property def error_names(self): if self.configuration is None: return None else: return self.configuration.error_names def delete_mpi_rank_directories(self): if self.mpi_rank == 0: self.log('Deleting previous rank directories') mpi_rank_directories = [ d for d in os.listdir(self.root_directory) if d.startswith('rank_') ] for d in mpi_rank_directories: try: shutil.rmtree(os.path.join(self.root_directory, d)) except: raise MPI.COMM_WORLD.Barrier() def determine_last_iteration_completed(self): for i in range(self.n_iterations): results_fn = os.path.join(self.data_directory, 'pyposmat.results.{}.out'.format(i)) kde_fn = os.path.join(self.data_directory, 'pyposmat.kde.{}.out'.format(i + 1)) if os.path.isfile(results_fn) and os.path.isfile(kde_fn): if self.mpi_rank == 0: self.log('iteration {}: is complete'.format(i)) self.start_iteration = i + 1 else: self.start_iteration = i break MPI.COMM_WORLD.Barrier() return self.start_iteration def run_all(self): """runs all iterations This method runs all iterations """ self.setup_mpi_environment() self.initialize_data_directory() self.start_iteration = 0 if self.is_restart: self.determine_last_iteration_completed() if self.mpi_rank == 0: self.log("starting at simulation: {}".format(self.start_iteration)) MPI.COMM_WORLD.Barrier() for i in range(self.start_iteration, self.n_iterations): self.i_iteration = i # log iteration information self.log_iteration_information(i_iteration=i) self.run_simulations(i) MPI.COMM_WORLD.Barrier() if self.mpi_rank == 0: self.log("ALL SIMULATIONS COMPLETE FOR ALL RANKS") self.log("MERGING FILES") self.merge_data_files(i) self.merge_error_files(i) MPI.COMM_WORLD.Barrier() if self.mpi_rank == 0: self.log("ANALYZE RESULTS") self.analyze_results(i) MPI.COMM_WORLD.Barrier() if self.mpi_rank == 0: self.log(80 * '-') self.log('JOBCOMPLETE') def initialize_sampler(self, config_fn, results_fn, mpi_rank=None, mpi_size=None, o_log=None): """ initialize the sampling object This method initializes the `mc_sampler` attribute with a sampler. Note: This breakout is part of a larger effort within PYPOSPACK, to have more object-oriented approach for parametric sampling. The goal eventually is to implement an instance of PyposmatBaseSampler, and allow users of this software library to be able to extend this software by simply extending the base class. Args: config_fn(str): path to the configuration file results_fn(str): path to the results file mpi_rank(int,optional): the MPI rank of executing this method mpi_size(int,optional): the size of the MPI execution group o_log(PyposmatLogFile,str,optional): the log file. If a string is passed, then the sampling class will initialize a separate log file with the string of path created. If a log file object is passed, then sampling object will use that instance of the object to log information. By defaut, it will pass the attribute, `o_log`. """ assert type(config_fn) is str assert type(results_fn) is str assert type(mpi_rank) in [type(None), int] assert type(mpi_size) in [type(None), int] assert type(o_log) in [type(None), PyposmatLogFile, str] # check to see if the paths provided are absolute paths assert os.path.isabs(config_fn) assert os.path.isabs(results_fn) if mpi_rank is None: mpi_rank = self.mpi_rank if mpi_size is None: mpi_size = self.mpi_size self.mc_sampler = PyposmatMonteCarloSampler(filename_in=config_fn, filename_out=results_fn, mpi_rank=mpi_rank, mpi_size=mpi_size, o_log=o_log) self.mc_sampler.create_base_directories() self.mc_sampler.read_configuration_file() # we have to be able to find the structure directory self.mc_sampler.configuration.structures[ 'structure_directory'] = self.structure_directory self.mc_sampler.configure_qoi_manager() self.mc_sampler.configure_task_manager() self.mc_sampler.configure_pyposmat_datafile_out() self.mc_sampler.configure_pyposmat_badparameters_file() self.log_more_iteration_information() def initialize_file_sampler(self, config_fn, results_fn, i_iteration=0, mpi_rank=None, mpi_size=None, o_log=None): """ initialize the sampling object This method initializes the `mc_sampler` attribute with a sampler. Note: This breakout is part of a larger effort within PYPOSPACK, to have more object-oriented approach for parametric sampling. The goal eventually is to implement an instance of PyposmatBaseSampler, and allow users of this software library to be able to extend this software by simply extending the base class. Args: config_fn(str): path to the configuration file results_fn(str): path to the results file i_iteration(int,optional): the iteration to sample the file from, by default this is set to zero. mpi_rank(int,optional): the MPI rank of executing this method mpi_size(int,optional): the size of the MPI execution group o_log(PyposmatLogFile,str,optional): the log file. If a string is passed, then the sampling class will initialize a separate log file with the string of path created. If a log file object is passed, then sampling object will use that instance of the object to log information. By defaut, it will pass the attribute, `o_log`. """ assert type(config_fn) is str assert type(results_fn) is str assert type(mpi_rank) in [type(None), int] assert type(mpi_size) in [type(None), int] assert type(o_log) in [type(None), PyposmatLogFile, str] # check to see if the paths provided are absolute paths assert os.path.isabs(config_fn) assert os.path.isabs(results_fn) if mpi_rank is None: mpi_rank = self.mpi_rank if mpi_size is None: mpi_size = self.mpi_size # get the absolute path of the datafile we are sampling from data_in_fn = None if os.path.isabs( self.configuration.sampling_type[i_iteration]['file']): data_in_fn = self.configuration.sampling_type[i_iteration]['file'] else: data_in_fn = os.path.join( self.root_directory, self.configuration.sampling_type[i_iteration]['file']) data_out_fn = results_fn self.mc_sampler = PyposmatFileSampler(config_fn=config_fn, data_in_fn=data_in_fn, data_out_fn=data_out_fn, mpi_rank=mpi_rank, mpi_size=mpi_size, o_log=o_log, fullauto=False) self.mc_sampler.create_base_directories() self.mc_sampler.read_configuration_file() # we have to be able to find the structure directory self.mc_sampler.configuration.structures[ 'structure_directory'] = self.structure_directory self.mc_sampler.configure_qoi_manager() self.mc_sampler.configure_task_manager() self.mc_sampler.configure_datafile_out() self.mc_sampler.configure_pyposmat_badparameters_file() self.log_more_iteration_information() def initialize_rank_directory(self): """ create the rank directory This method defines the rank directory as an absolute path and stores it in the attribute `rank_directory`. If a current directory exists there, then it is deleted with alll it's contents and then recreated. """ rank_directory = os.path.join( self.root_directory, self.RANK_DIR_FORMAT.format(self.mpi_rank)) # find the directory, delete it and it's constants and then recreates ot if os.path.isdir(rank_directory): shutil.rmtree(rank_directory) os.mkdir(rank_directory) self.rank_directory = rank_directory def run_simulations(self, i_iteration): """ run simulation for a single iteration Each rank is given a different execution context so that the disk IO don't conflict """ self.initialize_rank_directory() config_filename = self.configuration_filename results_filename = os.path.join(self.rank_directory, 'pyposmat.results.out') bad_parameters_filename = os.path.join(self.rank_directory, 'pyposmat.badparameters.out') # change execution context for this rank os.chdir(self.rank_directory) # set random seed self.determine_rv_seeds() self.log_random_seeds(i_iteration=i_iteration) sampling_type = self.configuration.sampling_type[i_iteration]['type'] if self.mpi_rank == 0: self.log("sampling_type={}".format(sampling_type)) MPI.COMM_WORLD.Barrier() # <----- parameter sampling type --------------------------------------- if sampling_type == 'parametric': self.initialize_sampler(config_fn=config_filename, results_fn=results_filename, mpi_rank=self.mpi_rank, mpi_size=self.mpi_size, o_log=self.o_log) self.run_parametric_sampling(i_iteration=i_iteration) # <----- kde sampling sampling type --------------------------------------- elif sampling_type == 'kde': self.initialize_sampler(config_fn=config_filename, results_fn=results_filename, mpi_rank=self.mpi_rank, mpi_size=self.mpi_size, o_log=self.o_log) self.run_kde_sampling(i_iteration=i_iteration) # <----- sampling from a file type --------------------------------------- # get parameters from file elif sampling_type == 'from_file': self.initialize_file_sampler(config_fn=config_filename, results_fn=results_filename, mpi_rank=self.mpi_rank, mpi_size=self.mpi_size, o_log=self.o_log) self.run_file_sampling(i_iteration=i_iteration) # <----- kde with clusters sampling type --------------------------------------- elif sampling_type == 'kde_w_clusters': cluster_fn = "pyposmat.cluster.{}.out".format(i_iteration) pyposmat_datafile_in = os.path.join(self.root_directory, self.data_directory, cluster_fn) _config_filename = os.path.join(self.root_directory, self.configuration_filename) # determine number of sims for this rank _mc_n_samples = _mc_config['n_samples_per_cluster'] _n_samples_per_rank = int(_mc_n_samples / self.mpi_size) if _mc_n_samples % self.mpi_size > self.mpi_rank: _n_samples_per_rank += 1 # initialize sampling object o = PyposmatClusterSampler(o_logger=self.log, mpi_rank=self.mpi_rank, mpi_comm=self.mpi_comm, mpi_size=self.mpi_size) o.create_base_directories() o.read_configuration_file(filename=_config_filename) # check to see if clustered data file exists if self.mpi_rank == 0: if not os.path.isfile(pyposmat_datafile_in): kde_fn = "pyposmat.kde.{}.out".format(i_iteration) kde_fn = os.path.join(self.root_directory, self.data_directory, kde_fn) o.write_cluster_file(filename=kde_fn, i_iteration=i_iteration) MPI.COMM_WORLD.Barrier() o.configure_pyposmat_datafile_in(filename=pyposmat_datafile_in) # fix relative path to structure databae folder _structure_dir = o.configuration.structures['structure_directory'] o.configuration.structures['structure_directory'] = \ os.path.join('..',_structure_dir) # finish the rest of the initialization o.configure_qoi_manager() o.configure_task_manager() o.configure_pyposmat_datafile_out() MPI.COMM_WORLD.Barrier() # run simulations o.run_simulations(i_iteration=i_iteration, n_samples=_mc_n_samples, filename=pyposmat_datafile_in) MPI.COMM_WORLD.Barrier() else: error_dict = OrderedDict([('i_iteration', i_iteration), ('sampling_type', sampling_type)]) m = "unknown parameter sampling type: {}".format(sampling_type) m += "the valid sampling types are: {}".format(",".join( self.parameter_sampling_types)) raise PyposmatSamplingTypeError(m, error_dict) # return to root directory os.chdir(self.root_directory) def initialize_data_directory(self, data_directory=None): """ determine the absolute path of the data directory and create it This method sets the `data_directory` attribute of the class and creates the `data directory` if the data directory already exists. Args: data_directory(str):the path of the data directory, the path can be expressed in either a relative path, or an absolute path Returns: (str) the absolute path of the data directory Raises: OSError: if the directory is not able to be created """ assert type(data_directory) in [type(None), str] assert type(self.data_directory) in [type(None), str] # determine the data directory path if data_directory is None: if self.data_directory is None: self.data_directory = os.path.join(self.root_directory, 'data') else: if os.path.isabs(self.data_directory): self.data_directory = data_directory else: self.data_directory = os.path.join(self.root_directory, self.data_directory) elif os.path.isabs(data_directory): # absolute path self.data_directory = data_directory else: # create a absolute path from the relative path self.data_directory = os.path.join(self.root_directory, data_directory) self.data_directory = os.path.abspath(self.data_directory) # create data directory if self.mpi_rank == 0: try: os.mkdir(self.data_directory) self.log('created the data directory.') self.log('\tdata_directory;{}'.format(self.data_directory)) except FileExistsError as e: self.log( 'attempted to create data directory, directory already exists.' ) self.log('\tdata_directory:{}'.format(self.data_directory)) except OSError as e: self.log( 'attempted to create data directory, cannot create directory.' ) self.log('\tdata_directory:{}'.format(self.data_directory)) MPI.COMM_WORLD.Barrier() def run_parametric_sampling(self, i_iteration): """ run parametric sampling Args: i_iteration(int): what iteration of the sampling is happening """ assert type(i_iteration) is int assert type(self.mc_sampler) is PyposmatMonteCarloSampler self.mc_sampler.run_simulations( i_iteration=i_iteration, n_samples=self.determine_number_of_samples_per_rank( i_iteration=i_iteration)) def run_kde_sampling(self, i_iteration): """ run kde sampling Args: i_iteration(int): what iteration of the sampling is happening """ is_debug = False assert type(i_iteration) is int assert type(self.mc_sampler) is PyposmatMonteCarloSampler kde_filename = os.path.join(self.data_directory, 'pyposmat.kde.{}.out'.format(i_iteration)) n_samples_per_rank = self.determine_number_of_samples_per_rank( i_iteration=i_iteration) if is_debug: print('cwd:{}'.format(os.getcwd())) print('mpi_rank:{},kde_filename:{}'.format(self.mpi_rank, kde_filename)) print('n_samples_per_rank:{}'.format(n_samples_per_rank)) self.mc_sampler.run_simulations(i_iteration=i_iteration, n_samples=n_samples_per_rank, filename=kde_filename) def run_file_sampling(self, i_iteration): """ run file sampling Args: i_iteration(int): the iteration which to sampling for """ assert type(i_iteration) is int assert type(self.mc_sampler) is PyposmatFileSampler if 'file' in self.configuration.sampling_type[i_iteration]: filename = os.path.join( self.root_directory, self.configuration.sampling_type[i_iteration]['file']) else: if os.path.isabs(self.data_directory): filename = os.path.join( self.data_directory, 'pyposmat.kde.{}.out'.format(i_iteration)) else: filename = os.path, join( self.root_directory, self.data_directory, 'pyposmat.kde.{}.out'.format(i_iteration)) if self.mpi_rank == 0: self.log(80 * '-') self.log('{:^80}'.format('file sampling')) self.log(80 * '-') self.log('filename_in:{}'.format(filename)) MPI.COMM_WORLD.Barrier() self.mc_sampler.run_simulations( i_iteration=i_iteration, n_samples=self.determine_number_of_samples_per_rank( i_iteration=i_iteration), filename=filename) def determine_number_of_samples_per_rank(self, i_iteration, N_samples=None): """ determine the number of samples per rank The total number of samples needs to be broken up between the ranks, but roughly divided the work evenly. Args: i_iteration(int): which iteration we are in the simulation N_samples(int,optional): the total number of samples we are using for this iteration. If a number is provided, it will override the number of simulations specified in the configuration file. Returns: (int): the number of samples for this rank """ assert type(i_iteration) is int assert type(N_samples) in [type(None), int] assert type(self.configuration) is PyposmatConfigurationFile if N_samples is None: N_samples = self.configuration.sampling_type[i_iteration][ 'n_samples'] N_samples_per_rank = int(N_samples / self.mpi_size) if N_samples % self.mpi_size > self.mpi_rank: N_samples_per_rank += 1 return N_samples_per_rank def initialize_logger(self, log_fn=None, log_to_stdout=None): """initialize log object Args: log_fn(str,optional) """ assert type(log_fn) in [type(None), str] assert type(log_to_stdout) in [type(None), bool] if log_fn is None: self.log_fn = os.path.join(self.root_directory, self.data_directory, 'pyposmat.log') else: self.log_fn = log_fn self.o_log = PyposmatLogFile(filename=self.log_fn) self.log_to_stdout = log_to_stdout def setup_mpi_environment(self): self.mpi_comm = MPI.COMM_WORLD self.mpi_rank = self.mpi_comm.Get_rank() self.mpi_size = self.mpi_comm.Get_size() self.mpi_procname = MPI.Get_processor_name() self.log_mpi_environment() # random seed management def determine_rv_seeds(self, seed=None, i_iteration=None): """ set the random variable seed across simulations Args: seed(int,optional)=a seed to determine the rest of the seeds for different ranks and iterations. """ RAND_INT_LOW = 0 RAND_INT_HIGH = 2147483647 assert type(seed) in [type(None), int] assert type(i_iteration) in [type(None), int] if type(i_iteration) is type(None): i_iteration = self.i_iteration # set the seed attribute if type(seed) is int: self.rv_seed == seed # set the seed attribute, if the seed attribute is none if self.rv_seed is None: self.rv_seed = np.random.randint(low=RAND_INT_LOW, high=RAND_INT_HIGH) # if the rv_seed was determined in the script, then all ranks will # have the same rv_seed attribute np.random.seed(self.rv_seed) # each rank, will need it's own seed. So we sample from the freshly # generated random number generator, which is identical across ranks self.rv_seeds = np.random.randint(low=0, high=2147483647, size=(int(self.mpi_size), self.n_iterations)) # now restart the seed for this rank np.random.seed(self.rv_seeds[self.mpi_rank, i_iteration]) # logging methods def log(self, s): if self.log_to_stdout: print(s) if self.o_log is not None: self.o_log.write(s) def log_iteration_information(self, i_iteration): """log iteration information Args: i_iteration_id(int):the iteration number Returns: (str) the log string """ if self.mpi_rank == 0: s = self.SECTION_HEADER_FORMAT.format( 'Begin Iteration {}/{}'.format(i_iteration + 1, self.n_iterations)) self.log(s) MPI.COMM_WORLD.Barrier() #if self.mpi_rank == 0: # return "\n".join(s) def log_more_iteration_information(self): #TODO: this logging needs to go into a separate logging method. -EJR if self.mpi_rank == 0: self.mc_sampler.print_structure_database() self.mc_sampler.print_sampling_configuration() if self.mpi_rank == 0 and self.i_iteration == 0: self.mc_sampler.print_initial_parameter_distribution() if self.mpi_rank == 0: self.log(80 * '-') MPI.COMM_WORLD.Barrier() def log_mpi_environment(self): if self.mpi_rank == 0: m = [ self.SECTION_HEADER_FORMAT.format( 'MPI communication information') ] m += ['mpi_size={}'.format(self.mpi_size)] MPI.COMM_WORLD.Barrier() def log_random_seeds(self, i_iteration): if self.mpi_rank == 0: self.log(80 * '-') self.log('{:^80}'.format('GENERATED RANDOM SEEDS')) self.log(80 * '-') self.log('global_seed:{}'.format(str(self.rv_seed))) self.log('seeds_for_this_iteration:') self.log('{:^8} {:^8}'.format('rank', 'seed')) self.log('{} {}'.format(8 * '-', 8 * '-')) MPI.COMM_WORLD.Barrier() for i_rank in range(self.mpi_size): if self.mpi_rank == i_rank: self.log('{:^8} {:>10}'.format( i_rank, self.rv_seeds[i_rank, i_iteration])) MPI.COMM_WORLD.Barrier() def get_results_dict(self): rd = OrderedDict() rd['mpi'] = OrderedDict() rd['mpi']['size'] = self.mpi_size def analyze_data_directories(self, data_dir=None): _d = data_dir i = 0 contents = [] if not os.path.exists(_d): return i, contents if not os.path.isdir(_d): return i, contents while True: kde_fn = os.path.join(_d, "pyposmat.kde.{}.out".format(i)) if os.path.exists(kde_fn): contents.append(kde_fn) else: if i > 0: contents.append(results_fn) break results_fn = os.path.join(_d, "pyposmat.results.{}.out".format(i)) if os.path.exists(results_fn): pass else: break i = i + 1 return i, contents def analyze_rank_directories(self, root_dir=None): i = 0 contents = [] if root_dir is None: _d = self.root_directory else: _d = root_directory while True: rank_dir = os.path.join(_d, "rank_{}".format(i)) if not os.path.exists(rank_dir): break if not os.path.isdir(rank_dir): break rank_fn = os.path.join("rank_{}".format(i), "pyposmat.results.out") if not os.path.exists(os.path.join(_d, rank_fn)): break if not os.path.isfile(os.path.join(_d, rank_fn)): break else: contents.append(rank_fn) i = i + 1 return i, contents def find_initial_parameters_file(self): if 'file' in self.configuration.sampling_type[0]: _init_fn = os.path.join( self.root_directory, self.configuration.sampling_type[0]['file']) if os.path.exists(_init_fn): if os.path.isfile(_init_fn): return _init_fn else: return None def merge_data_files(self, i_iteration, last_datafile_fn=None, new_datafile_fn=None): """ merge the pyposmat data files Args: i_iteration(int): the current iteration which just finished last_datafile_fn(str,optional): the filename of the last dataset in the data directory. new_datafile_fn(str,optional): where to output the file results """ if last_datafile_fn is None: last_datafile_fn = os.path.join( self.data_directory, 'pyposmat.kde.{}.out'.format(i_iteration)) if new_datafile_fn is None: new_datafile_fn = os.path.join( self.data_directory, 'pyposmat.results.{}.out'.format(i_iteration)) data_dir = self.data_directory rank_dirs = [ v for v in os.listdir(self.root_directory) if v.startswith('rank_') ] filenames = [ os.path.join(self.root_directory, v, 'pyposmat.results.out') for v in rank_dirs ] data = None for i, v in enumerate(filenames): data_new = None if i == 0: data = PyposmatDataFile() data.read(filename=v) else: data_new = PyposmatDataFile() data_new.read(filename=v) data.df = pd.concat([data.df, data_new.df]) nrows = len(data.df) if self.configuration.sampling_type[i_iteration][ 'type'] == 'from_file': pass else: sim_id_fmt = '{:0>2}_{:0>6}' sim_id_str = [ sim_id_fmt.format(i_iteration, i) for i in range(nrows) ] data.df['sim_id'] = [ sim_id_fmt.format(i_iteration, i) for i in range(nrows) ] if self.configuration.sampling_type[i_iteration][ 'type'] == "from_file": data_new = PyposmatDataFile() data_new.read(filename=filenames[0]) data_new.df = data.df data_new.write(filename=new_datafile_fn) else: self.log("merging with candidates from previous simulations") self.log("\tfilename:{}".format(last_datafile_fn)) data_old = PyposmatDataFile() try: data_old.read(filename=last_datafile_fn) data_old.df = pd.concat([data_old.df, data.df]) data_old.write(filename=new_datafile_fn) except FileNotFoundError as e: if i_iteration == 0: data.write(filename=new_datafile_fn) else: raise def merge_error_files(self, i_iteration): """ merge the pyposmat data files Args: i_iteration(int): the current iteration which just finished last_datafile_fn(str,optional): the filename of the last dataset in the data directory. new_datafile_fn(str,optional): where to output the file results """ badparameters_fn = os.path.join(self.data_directory, 'pyposmat.badparameters.out') data_dir = self.data_directory rank_dirs = [ v for v in os.listdir(self.root_directory) if v.startswith('rank_') ] filenames = [ os.path.join(self.root_directory, v, 'pyposmat.badparameters.out') for v in rank_dirs ] # consolidate rank directories badparameters_new = None badparameters_next = None for i, v in enumerate(filenames): if badparameters_new is None: try: badparameters_new = PyposmatBadParametersFile( o_config=self.configuration) badparameters_new.read(filename=v) except FileNotFoundError as e: self.log("no bad parameters file at {}".format(v)) else: try: badparameters_next = PyposmatBadParametersFile( o_config=self.configuration) badparameters_next.read(filename=v) badparameters_new.df = pd.concat( [badparameters_new.df, badparameters_next.df]) except FileNotFoundError as e: self.log("no bad parameters file as {}".format(v)) # determine the sim_id for bad parameters of the sim_id if badparameters_new.df is None: # no previous bad paramters found # TODO: need to implement something here to deal with bad parameters pass else: nrows = len(badparameters_new.df) sim_id_fmt = '{:0>2}_{:0>6}' sim_id_str = [ sim_id_fmt.format(i_iteration, i) for i in range(nrows) ] badparameters_new.df['sim_id'] = sim_id_str if self.configuration.sampling_type[i_iteration][ 'type'] == "from_file": badparameters_new.write(filename=badparameters_fn) else: self.log( "merging with bad candidates from previous simulations") self.log("\tfilename:{}".format(badparameters_fn)) badparameters = PyposmatBadParametersFile( o_config=self.configuration) try: badparameters.read(filename=badparameters_fn) badparameters.df = pd.concat( [badparameters.df, badparameters_new.df]) badparameters.write(filename=badparameters_fn) except FileNotFoundError as e: if i_iteration == 0: badparameters_new.write(filename=badparameters_fn) else: raise def analyze_results(self, i_iteration, data_fn=None, config_fn=None, kde_fn=None, analysis_fn=None): """ analyze the results of the simulation this method analyzes the results of the simulation, and does post simulation tasks, such as filtering by qoi performance, pareto optimization, etc. Args: data_fn(str): the path of the data file. By default this is set to none where the the file will be determine by i_iteration and internal attributes config_fn(str): the path of the data file. By default this is set to none where the the file will be determine by i_iteration and internal attributes kde_fn(str): the path of the data file. By default this is set to none where the the file will be determine by i_iteration and internal attributes """ if data_fn is None: data_fn = os.path.join(\ self.root_directory, self.data_directory, 'pyposmat.results.{}.out'.format(i_iteration)) if config_fn is None: config_fn = os.path.join(\ self.root_directory, self.configuration_filename) if kde_fn is None: kde_fn = os.path.join(\ self.root_directory, self.data_directory, 'pyposmat.kde.{}.out'.format(i_iteration+1)) if analysis_fn is None: analysis_fn = os.path.join(self.root_directory, self.data_directory, 'pyposmat.analysis.out') data_analyzer = PyposmatDataAnalyzer() data_analyzer.initialize_configuration(config_fn=config_fn) data_analyzer.analyze_results_data(i_iteration, filename=data_fn) assert isinstance(data_analyzer.results_statistics, OrderedDict) if os.path.isfile(analysis_fn): data_analyzer.read_analysis_file(filename=analysis_fn) self.log( data_analyzer.str__results_descriptive_statistics( statistics=data_analyzer.results_statistics)) self.log(data_analyzer.str__qoi_filtering_summary()) data_analyzer.write_kde_file(filename=kde_fn) data_analyzer.analyze_kde_data(i_iteration, filename=kde_fn) assert isinstance(data_analyzer.kde_statistics, OrderedDict) self.log( data_analyzer.str__kde_descriptive_statistics( statistics=data_analyzer.kde_statistics)) data_analyzer.update_analysis(i_iteration) data_analyzer.write_analysis_file(filename=analysis_fn) def read_configuration_file(self, filename=None): assert type(filename) in [type(None), str] assert type(self.configuration_filename) in [type(None), str] if filename is not None: self.configuration_filename = filename if not os.path.isabs(self.configuration_filename): self.configuration_filename = os.path.abspath( self.configuration_filename) self.configuration = PyposmatConfigurationFile() self.configuration.read(filename=self.configuration_filename) if self.mpi_rank == 0: self._write_parameter_names() self._write_qoi_names() self._write_error_names() def _write_parameter_names(self, parameter_names=None): if parameter_names is None: _parameter_names = self.parameter_names else: _parameter_names = parameter_names s = [80 * '-'] s += ['{:^80}'.format('PARAMETER_NAMES')] s += [80 * '-'] s += [p for p in _parameter_names] self.log("\n".join(s)) def _write_qoi_names(self, qoi_names=None): if qoi_names is None: _qoi_names = self.qoi_names else: _qoi_names = qoi_names s = [80 * '-'] s += ['{:^80}'.format('QOI_NAMES')] s += [80 * '-'] s += [p for p in _qoi_names] self.log("\n".join(s)) def _write_error_names(self, error_names=None): if error_names is None: _error_names = self.error_names else: _error_names = error_names s = [80 * '-'] s += ['{:^80}'.format('ERROR_NAMES')] s += [80 * '-'] s += [p for p in _error_names] self.log("\n".join(s))