def test__write_header_string__no_args(): testing_set = get_testing_set() o = PyposmatBadParametersFile(filename=testing_set['badparameters_out_fn'], config_fn=testing_set['config_fn']) o.write_header_section() assert os.path.isfile(testing_set['badparameters_out_fn']) if os.path.isfile(testing_set['badparameters_out_fn']): os.remove(testing_set['badparameters_out_fn'])
def test__write_header_string__w_filename(): testing_set = get_testing_set() badparameters_out_fn = 'pyposmat.badparameters.test' o = PyposmatBadParametersFile(filename=testing_set['badparameters_out_fn'], config_fn=testing_set['config_fn']) o.write_header_section(filename=badparameters_out_fn) assert os.path.isfile(badparameters_out_fn) if os.path.isfile(badparameters_out_fn): os.remove(badparameters_out_fn)
def test__write_header_section(): parameter_names = ['a,b,c'] filename = 'test.out' if os.path.isfile(filename): os.remove(filename) f = PyposmatBadParametersFile() f.write_header_section(parameter_names=parameter_names, filename=filename) assert os.path.isfile(filename) if os.path.isfile(filename): os.remove(filename)
def dev__write_header_section(): print(80 * '-') print('{:^80}'.format('write_header_section')) print(80 * '-') testing_set = get_testing_set() if os.path.isfile(testing_set['filename']): os.remove(testing_set['filename']) f = PyposmatBadParametersFile() f.write_header_section(parameter_names=testing_set['parameter_names'], filename=testing_set['filename']) with open(testing_set['filename'], 'r') as f: print(f.read())
def dev__write_simulation_exception(): import importlib testing_set = get_testing_set() o = PyposmatBadParametersFile() o.write_header_section(parameter_names=testing_set['parameter_names'], filename=testing_set['filename']) module_name = 'pypospack.exceptions' module = importlib.import_module(module_name) class_names = ['LammpsSimulationError'] for class_name in class_names: sim_id = class_name m = "message" exception = getattr(module, class_name)(m, parameters=testing_set['parameters']) o.write_simulation_exception(sim_id=sim_id, exception=exception)
class PyposmatMonteCarloSampler(PyposmatEngine): def __init__(self, filename_in='pyposmat.config.in', filename_out='pyposmat.results.out', o_log=None, mpi_rank=None, mpi_size=None, base_directory=None): """Additional attributes are set by the base class :obj:PyposmatEngine Args: filename_in (str) - path of the configuration file filename_out (str) - path of the output file o_log (PyposmatLogFile) - if type(o_log) is a string, then the string is treated as a path in which to log information to. If type(o_log) is PyposmatLogFile then it is set as an attribute for the refernce. mpi_rank (int) mpi_size (int) base_directory (str,optional): Either the relative or full path which provides a unique drive addressing space for simultaneously running simulations. Attributes: mpi_rank (int) - this is passed in mpi_size (int) - this is passed in pyposmat_data_in_filename (str) - the path of the datafile to read in pyposmat_data_out_filename (str) - the path of the datafile to write simulation results to """ assert isinstance(filename_in, str) assert isinstance(filename_out, str) assert type(base_directory) in [str, type(None)] PyposmatEngine.__init__(self, filename_in=filename_in, filename_out=filename_out, base_directory=base_directory, fullauto=False) if mpi_rank is None: self.mpi_rank = 0 else: self.mpi_rank = mpi_rank if mpi_size is None: self.mpi_size = 1 else: self.mpi_size = mpi_size assert self.mpi_rank < self.mpi_size self.mpi_rank = mpi_rank self.mpi_size = mpi_size self.pyposmat_data_in_filename = None self.pyposmat_data_out_filename = filename_out self.pyposmat_badparameters_filename = 'pyposmat.badparameters.out' try: self.configure_logger(o_log) except TypeError as e: m = "Unable to to configure obj_log based on attribute log:{}".format( str(o_log)) raise TypeError(m) def configure_logger(self, o_log=None): """ Configurtion of the log object has different behavior based upon the type passed into the argument o_log. If o_log is PyposmatLogFile, that object will be accessed by reference. A string is assumed to be a filename location. By default the argument for o_log is None, which means logging will go to standard out by means of the print() function. Args: o_log (str,PyposmatLogFile,None): default: None """ if type(o_log) is PyposmatLogFile: self.obj_log = o_log elif type(o_log) is str: self.obj_log = PyposmatLogFile(filename=o_log) elif o_log is None: self.obj_log = None else: m = "log object must be str, PyposmatLogFile, or None" raise TypeError(m) def log(self, str_msg): if type(str_msg) is str: m = str_msg elif type(str_msg) is list: m = "\n".join(str_msg) if type(self.obj_log) is PyposmatLogFile: self.obj_log.write(m) print(m) def configure_pyposmat_datafile_in(self, filename): self.pyposmat_data_in_filename = filename self.pyposmat_datafile_in = PyposmatDataFile(filename) def configure_pyposmat_datafile_out(self, filename=None): if filename is not None: assert type(filename) is str self.pyposmat_data_out_filename = filename self.pyposmat_datafile_out = PyposmatDataFile(filename) def configure_pyposmat_badparameters_file(self, filename=None): if filename is not None: assert type(filename) is str self.pyposmat_badparameters_filename = filename self.pyposmat_badparameters = PyposmatBadParametersFile( filename=self.pyposmat_badparameters_filename, o_config=self.configuration) def read_configuration_file(self, filename=None): PyposmatEngine.read_configuration_file(self, filename=filename) # self.structure_directory = self.configuration.structures['structure_directory'] self.n_iterations = self.configuration.sampling_type['n_iterations'] self.parameter_names = [ p for p in self.configuration.sampling_distribution ] self.qoi_names = [k for k in self.configuration.qois] self.error_names = ['{}.err'.format(k) for k in self.qoi_names] self.parameter_distribution_definition =\ self.configuration.sampling_distribution try: self.free_parameter_names = [ k for k, v in self.parameter_distribution_definition.items() if v[0] != 'equals' ] except KeyError as e: print(self.parameter_distribution_definition.items()) raise if self.configuration.sampling_constraints is not None: self.parameter_constraints = copy.deepcopy( self.configuration.sampling_constraints) else: self.parameter_constraints = OrderedDict() self.constrained_parameter_names = [] for p in self.parameter_names: if p not in self.free_parameter_names: self.constrained_parameter_names.append(p) def run_simulations(self, i_iteration, n_samples=None, filename=None): """ Args: i_iteration(int): the iteration cycle we are on. n_samples(int,optional): the number of parameters to evaluate filename(str,optional): the filename """ assert type(i_iteration) is int assert type(n_samples) in [type(None), int] assert type(filename) in [type(None), str] i = i_iteration _sampling_type = self.configuration.sampling_type[i]['type'] _n_samples = self.configuration.sampling_type[i]['n_samples'] if self.mpi_rank == 0: m = [ "R{}: Starting iteration N={}".format(self.mpi_rank, i_iteration) ] if _sampling_type is "from_file": m += [ "R{}: Sampling parameters from {}".format( self.mpi_rank, filename) ] else: m += [ "R{}: Attemping n_samples={} with sampling_type={}".format( self.mpi_rank, _n_samples, _sampling_type) ] if filename is not None: m += ["R{}: Using file:{}".format(self.mpi_rank, filename)] self.log(m) if n_samples is not None: _n_samples = n_samples if _sampling_type == 'parametric': self.run_parameteric_sampling(n_samples=_n_samples) elif _sampling_type == 'kde': if filename is None: raise ValueError('cannot do kde sampling with out filename') self.run_kde_sampling(n_samples=_n_samples, filename_in=filename) elif _sampling_type == 'from_file': if filename is None: raise ValueError('cannot do filesampling without file') self.run_file_sampling(filename) else: raise ValueError('unknown sampling type:{}'.format(_sampling_type)) def write_badparameters_header(self): self.pyposmat_badparameters.write_header_section( filename=self.pyposmat_badparameters_filename) def write_data_out_header(self): self.pyposmat_datafile_out.write_header_section( filename=self.pyposmat_data_out_filename, parameter_names=self.parameter_names, qoi_names=self.qoi_names, error_names=self.error_names) def get_sim_id(self, i, s=None): if s is not None: return s elif isinstance(i, int): return str(i) else: m = 'cannot determine sim_id from i:{} and s:{}'.format(i, s) raise TypeError(m) def run_parameteric_sampling(self, n_samples): # create random number generator _rv_generators = OrderedDict() for p in self.free_parameter_names: distribution_type = self.parameter_distribution_definition[p][0] if distribution_type == 'uniform': _a = self.parameter_distribution_definition[p][1]['a'] _b = self.parameter_distribution_definition[p][1]['b'] _loc = _a _scale = _b - _a _rv_generators[p] = scipy.stats.uniform(loc=_loc, scale=_scale) elif distribution_type == 'normal': _mu = self.parameter_distribution_definition[p][1]['mu'] _sigma = self.parameter_distribution_definition[p][1]['sigma'] _loc = _mu _scale = _sigma _rv_generators[p] = scipy.stats.norm(loc=_loc, scale=_scale) else: raise ValueError( 'unknown distribution type: {}'.format(distribution_type)) self.write_data_out_header() self.write_badparameters_header() time_start_iteration = time.time() _n_errors = 0 for i_sample in range(n_samples): # determin sim_id sim_id = self.get_sim_id(i=i_sample) # new OrderedDict to hold in parameter values _parameters = OrderedDict([(p, None) for p in self.parameter_names]) # generate free parameters for ordered dictionary for p in self.free_parameter_names: _parameters[p] = _rv_generators[p].rvs(size=1)[0] # determine parameters determined from equality constraints for p in self.constrained_parameter_names: _constraint_type = self.parameter_distribution_definition[p][0] if _constraint_type == 'equals': # this condition is for fitting EoS for EAM function which # requires a refernce ground state crystal structure if p.endswith('latticetype'): _v = self.parameter_distribution_definition[p][1] _parameters[p] = _v # process evaluation strings elif type(self.parameter_distribution_definition[p] [1]) is not list: _str_eval = str( self.parameter_distribution_definition[p][1]) # replace string values with numerical values for fp in self.free_parameter_names: if fp in _str_eval: _str_eval = _str_eval.replace( fp, str(_parameters[fp])) # evaluate the string into a float _parameters[p] = eval(_str_eval) else: raise ValueError("oops") # additional tasks added here for p in self.constrained_parameter_names: if self.parameter_distribution_definition[p][0] == 'equals': if type(self.parameter_distribution_definition[p] [1]) is list: # required for EAM potentials to calculate dens_max for embedding function if self.parameter_distribution_definition[p][1][ 0] == 'equilibrium_density': a0 = self.parameter_distribution_definition[p][1][ 1] latt = self.parameter_distribution_definition[p][ 1][2] _parameters[ p] = self.calculate_equilibrium_density( a0, latt, _parameters) try: # check constraints for k, v in self.parameter_constraints.items(): _eval_str = v for pn, pv in _parameters.items(): _eval_str = _eval_str.replace(pn, str(pv)) if eval(_eval_str) is False: m = "failed parameter constraint, {}".format(k) raise PyposmatBadParameterError(m, parameters=_parameters) _results = self.evaluate_parameter_set(parameters=_parameters) except PyposmatBadParameterError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 except LammpsSimulationError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 except PypospackTaskManagerError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 except PypospackBadEamEosError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 else: #if type(sim_id) is float: # _sim_id = int(sim_id) _sim_id = "{}".format(i_sample) self.pyposmat_datafile_out.write_simulation_results( filename=self.pyposmat_data_out_filename, sim_id=_sim_id, results=_results) finally: # print out summaries every 10 solutions if (i_sample + 1) % 10 == 0: n_samples_completed = i_sample + 1 time_end = time.time() time_total = time_end - time_start_iteration avg_time = time_total / n_samples_completed _str_msg = 'R{}:{} samples completed in {:.4f}s. Avg_time = {:.4f}. n_errors = {}'.format( self.mpi_rank, n_samples_completed, time_total, avg_time, _n_errors) self.log(_str_msg) def get_options_kde_bandwidth(self): """ Returns: OrderedDict """ kde_options = OrderedDict() kde_options['chiu1999'] = OrderedDict() kde_options['chiu1999'][ 'reference'] = 'Chiu, S.T. Ann. Stat. 1991, Vol. 19, No 4. 1883-1905' kde_options['chiu1999']['doi'] = '10.1214/aos/1176348376' kde_options['chiu1999']['description'] = "" kde_options['silverman1984'] = OrderedDict() kde_options['silverman1984'][ 'reference'] = 'Silverman, B.W. (1986). Density Estimation for Statistics and Data Analysis. London: Chapman & Hall/CRC. p. 48' kde_options['silverman1984']['isbn'] = '0-412-24620-1' def determine_kde_bandwidth(self, X, kde_bw_type): """ determine kde bandwidth Args: X(np.ndarray): array of data to determine the KDE bandwidth kde_bw_type(str): the method of estimating the optimal bandwidth """ if self.mpi_rank == 0: self.log('determine kde bandwidth...') if kde_bw_type == 'chiu1999': try: h = Chiu1999_h(X) except ValueError as e: print(X) raise elif kde_bw_type == 'silverman1985': h = Silverman1986 else: m = 'kde_bw_type, {}, is not an implemented bandwidth type' raise PypospackBadKdeBandwidthType(m) if self.mpi_rank == 0: self.log('{}:{}'.format(kde_bw_type, h)) self.kde_bw_type = kde_bw_type self.kde_bw = h return self.kde_bw def run_kde_sampling(self, n_samples, filename_in, cluster_id=None, kde_bw_type='chiu1999'): """ sample from a KDE distribution Args: n_samples(int): the number of samples to draw from the KDE distribution filename_in(str): the path to the datafile from which the parameters will be drawn from cluster_id(int): if we need to use a specific cluster_id, we specify it here. otherwise, it will be drawn from all parameters contained within the set. kde_bw_type(str): the method of estimating the optimal bandwidth """ _datafile_in = PyposmatDataFile() _datafile_in.read(filename_in) if cluster_id is None: _free_parameter_names = [str(v) for v in self.free_parameter_names] _X = _datafile_in.df[_free_parameter_names].values.T else: # subselect the dataframe by the cluster_id of interest _datafile_in.df = _datafile_in.df.loc[_datafile_in.df['cluster_id'] == cluster_id] _X = _datafile_in.df[self.free_parameter_names].loc[ _datafile_in.df['cluster_id'] == cluster_id].values.T # self.log.write("cluster_id {c} _X.shape={x}".format(c=cluster_id, x=_X.shape)) kde_bw = self.determine_kde_bandwidth(X=_X, kde_bw_type=kde_bw_type) _rv_generator = scipy.stats.gaussian_kde(_X, kde_bw) self.write_data_out_header() self.write_badparameters_header() time_start_iteration = time.time() _n_errors = 0 for i_sample in range(n_samples): # determine sim_id sim_id = self.get_sim_id(i=i_sample) # new OrderedDict to hold in parameter values _parameters = OrderedDict([(p, None) for p in self.parameter_names]) # generate free parameters for ordered dictionary _free_parameters = _rv_generator.resample(1) for i, v in enumerate(self.free_parameter_names): _parameters[v] = float(_free_parameters[i, 0]) # determine parameters determined from equality constraints for p in self.constrained_parameter_names: _constraint_type = self.parameter_distribution_definition[p][0] if _constraint_type == 'equals': # this condition is for fitting EoS for EAM function which # requires a refernce ground state crystal structure if p.endswith('latticetype'): _v = self.parameter_distribution_definition[p][1] _parameters[p] = _v # process evaluation strings elif type(self.parameter_distribution_definition[p] [1]) is not list: _str_eval = str( self.parameter_distribution_definition[p][1]) # replace string values with numerical values for fp in self.free_parameter_names: if fp in _str_eval: _str_eval = _str_eval.replace( fp, str(_parameters[fp])) # evaluate the string into a float _parameters[p] = eval(_str_eval) else: raise ValueError("oops") for p in self.constrained_parameter_names: if self.parameter_distribution_definition[p][0] == 'equals': # some EAM potentials have a normalizing equilbirum density # which have to be determined based upon the parameterization of # the electron density function if type(self.parameter_distribution_definition[p] [1]) is list: if self.parameter_distribution_definition[p][1][ 0] == 'equilibrium_density': a0 = self.parameter_distribution_definition[p][1][ 1] latt = self.parameter_distribution_definition[p][ 1][2] _parameters[ p] = self.calculate_equilibrium_density( a0, latt, _parameters) try: # now we check parameter inequality constraints for k, v in self.parameter_constraints.items(): _eval_str = v for pn, pv in _parameters.items(): _eval_str = _eval_str.replace(pn, str(pv)) if eval(_eval_str) is False: s = 'parameter constraint failed, {}'.format(k) raise PyposmatBadParameterError(s, parameters=_parameters) _results = self.evaluate_parameter_set(parameters=_parameters) except PyposmatBadParameterError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 except LammpsSimulationError as e: assert isinstance(self.pyposmat_badparameters, PyposmatBadParametersFile) assert isinstance(self.pyposmat_badparameters.parameter_names, list) self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 except PypospackTaskManagerError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 except PypospackBadEamEosError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 else: # determine sim_id _sim_id = int(i_sample) self.pyposmat_datafile_out.write_simulation_results( filename=self.pyposmat_data_out_filename, sim_id=i_sample, cluster_id=cluster_id, results=_results) finally: # print out summaries every 10 solutions if (i_sample + 1) % 10 == 0: n_samples_completed = i_sample + 1 time_end = time.time() time_total = time_end - time_start_iteration avg_time = time_total / n_samples_completed _str_msg = 'R{}:{} samples completed in {:.4f}s. Avg_time = {:.4f}. n_errors = {}'.format( self.mpi_rank, n_samples_completed, time_total, avg_time, _n_errors) self.log(_str_msg) d = OrderedDict() d['kde_bandwidth'] = OrderedDict() d['kde_bandwidth']['type'] = self.kde_bw_type d['kde_bandwidth']['h'] = self.kde_bw def run_file_sampling(self, filename_in): _datafile_in = PyposmatDataFile(filename=filename_in) _datafile_in.read() # configure random number generator self.write_data_out_header() self.write_badparameters_header() time_start_iteration = time.time() _n_errors = 0 i_sample = 0 for row in _datafile_in.df.iterrows(): if self.mpi_rank != i_sample % self.mpi_size: i_sample += 1 continue else: i_sample += 1 _parameters = OrderedDict([(p, row[1][p]) for p in self.parameter_names]) _sim_id = row[1]['sim_id'] # generate wierd things for p in self.constrained_parameter_names: if self.parameter_distribution_definition[p][0] == 'equals': if type(self.parameter_distribution_definition[p] [1]) is list: if self.parameter_distribution_definition[p][1][ 0] == 'equilibrium_density': a0 = self.parameter_distribution_definition[p][1][ 1] latt = self.parameter_distribution_definition[p][ 1][2] _parameters[ p] = self.calculate_equilibrium_density( a0, latt, _parameters) try: # check constraints for k, v in self.parameter_constraints.items(): _eval_str = v for pn, pv in _parameters.items(): _eval_str = _eval_str.replace(pn, str(pv)) if eval(_eval_str) is False: raise PyposmatBadParameterError() _results = self.evaluate_parameter_set(parameters=_parameters) except PyposmatBadParameterError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 except LammpsSimulationError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 except PypospackTaskManagerError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 except PypospackBadEamEosError as e: self.pyposmat_badparameters.write_simulation_exception( sim_id=sim_id, exception=e) _n_errors += 1 else: if type(_sim_id) is float: _sim_id = int(sim_id) self.pyposmat_datafile_out.write_simulation_results( filename=self.pyposmat_data_out_filename, sim_id=_sim_id, results=_results) finally: # print out summaries every 10 solutions i_sample = i_sample + 1 if (i_sample) % 10 == 0: n_samples_completed = i_sample time_end = time.time() time_total = time_end - time_start_iteration avg_time = time_total / n_samples_completed _str_msg = '{} samples completed in {:.4f}s. Avg_time = {:.4f}. n_errors = {}'.format( n_samples_completed, time_total, avg_time, _n_errors) print('rank{}:'.format(self.mpi_rank) + _str_msg) def calculate_equilibrium_density(self, a0, latt, parameters): _parameters = OrderedDict() for k, v in parameters.items(): if k.startswith('d_'): _parameters[k[2:]] = v s = k[2:].split('_')[0] _potential_type = self.configuration.potential['density_type'] _symbols = self.configuration.potential['symbols'] _module_name, _class_name = PotentialObjectMap( potential_type=_potential_type) try: _module = importlib.import_module(_module_name) _class = getattr(_module, _class_name) _dens_potential = _class(symbols=_symbols) except: raise if latt == 'fcc': d = OrderedDict([('1NN', 2 / (2**0.5) * a0), ('2NN', 1.000 * a0), ('3NN', 1.225 * a0)]) Z = OrderedDict([('1NN', 12), ('2NN', 6), ('3NN', 24)]) rcut = (d['2NN'] + d['3NN']) / 2. rmax = 10. r = np.linspace(1, 10, 5000) * rmax / 10 rho = _dens_potential.evaluate(r, _parameters, rcut) rho_e = 0 for m in Z: if d[m] < rcut: rho_e += Z[m] * np.interp(d[m], r, rho[s]) return rho_e def print_structure_database(self): m = [ 80 * '-', '{:^80}'.format('STRUCTURE DATABASE'), 80 * '-', 'structure_directory:{}'.format(self.structure_directory), '', '{:^20} {:^20}'.format('name', 'filename'), '{} {}'.format(20 * '-', 20 * '-') ] m += [ '{:20} {:20}'.format(k, v) for k, v in self.structures['structures'].items() ] self.log(m) def print_sampling_configuration(self): print(80 * '-') print('{:^80}'.format('SAMPLING CONFIGURATION')) print(80 * '-') print('{:^10} {:^10} {:^20}'.format('iteration', 'n_samples', 'sampling_type')) print('{} {} {}'.format(10 * '-', 10 * '-', 20 * '-')) for i in range(self.n_iterations): _sample_type = self.configuration.sampling_type[i]['type'] if _sample_type == 'kde_w_clusters': _n_samples = self.configuration.sampling_type[i][ 'n_samples_per_cluster'] else: _n_samples = self.configuration.sampling_type[i]['n_samples'] print('{:^10} {:^10} {:^20}'.format(i, _n_samples, _sample_type)) def print_initial_parameter_distribution(self): print(80 * '-') print('{:80}'.format('INITIAL PARAMETER DISTRIBUTION')) print(80 * '-') for p in self.parameter_distribution_definition: if p in self.free_parameter_names: str_free = 'free' if self.parameter_distribution_definition[p][0] == 'uniform': print('{:^20} {:^10} {:^10} {:^10} {:^10}'.format( p, str_free, self.parameter_distribution_definition[p][0], self.parameter_distribution_definition[p][1]['a'], self.parameter_distribution_definition[p][1]['b'])) elif self.parameter_distribution_definition[p][0] == 'normal': print('{:^20} {:^10} {:^10} {:^10} {:^10}'.format( p, str_free, self.parameter_distribution_definition[p][0], self.parameter_distribution_definition[p][1]['mu'], self.parameter_distribution_definition[p][1]['sigma'])) else: _distribution_type = self.parameter_distribution_defintion[ p][0] s = "incorrection parameter distribution for parameter {}. probability distribution function, {}, is not supported" s = s.format(p, _distribution_type) raise ValueError(s) else: str_free = 'not_free' print('{:^20} {:^10}'.format(p, str_free))