def dev__run_simulations(): test_case = test_case_MgO assert os.path.isfile(config_fn) assert os.path.isfile(data_in_fn) o = PyposmatFileSampler(config_fn=test_case['config_fn'], data_in_fn=test_case['data_in_fn']) o.run_simulations(i_iteration=test_case['i_iteration'])
def test__configure_task_manager(): o_sampler = PyposmatFileSampler( config_fn=config_fn, data_in_fn=datafile_in_fn, data_out_fn=datafile_out_fn) o_sampler.create_base_directories() o_sampler.read_configuration_file() o_sampler.configure_qoi_manager(use_fitting_qois=False,use_testing_qois=True) o_sampler.configure_task_manager() from pypospack.task import TaskManager assert type(o_sampler.task_manager) is TaskManager
def dev__configure_qoi_manager(): config_fn = test_case_MgO['config_fn'] data_in_fn = test_case_MgO['data_in_fn'] o = PyposmatFileSampler(config_fn=config_fn, data_in_fn=data_in_fn) qois = [k for k in o.configuration.qois] print('qoi_names:', qois) o.configure_qoi_manager() print('o.configuration.qoi_names:', o.configuration.qoi_names) s = str(type(o.qoi_manager)) print("type(o.qoi_manager):{}".format(s)) s = str(type((o.qoi_manager.tasks))) print('type(o.qoi_manager.tasks:{}'.format(s))
def test____init__(): o_sampler = PyposmatFileSampler( config_fn=config_fn, data_in_fn=datafile_in_fn, data_out_fn=datafile_out_fn) assert o_sampler.reference_potentials == reference_potentials
def initialize_sampler(self, config_fn, results_fn, mpi_rank=None, mpi_size=None, o_log=None): """ initialize the sampling object This method initializes the `mc_sampler` attribute with a sampler. Note: This breakout is part of a larger effort within PYPOSPACK, to have more object-oriented approach for parametric sampling. The goal eventually is to implement an instance of PyposmatBaseSampler, and allow users of this software library to be able to extend this software by simply extending the base class. Args: config_fn(str): path to the configuration file results_fn(str): path to the results file mpi_rank(int,optional): the MPI rank of executing this method mpi_size(int,optional): the size of the MPI execution group o_log(PyposmatLogFile,str,optional): the log file. If a string is passed, then the sampling class will initialize a separate log file with the string of path created. If a log file object is passed, then sampling object will use that instance of the object to log information. By defaut, it will pass the attribute, `o_log`. """ assert type(config_fn) is str assert type(results_fn) is str assert type(mpi_rank) in [type(None), int] assert type(mpi_size) in [type(None), int] assert type(o_log) in [type(None), PyposmatLogFile, str] # check to see if the paths provided are absolute paths assert os.path.isabs(config_fn) assert os.path.isabs(results_fn) if mpi_rank is None: mpi_rank = self.mpi_rank if mpi_size is None: mpi_size = self.mpi_size self.mc_sampler = PyposmatMonteCarloSampler(filename_in=config_fn, filename_out=results_fn, mpi_rank=mpi_rank, mpi_size=mpi_size, o_log=o_log) self.mc_sampler.create_base_directories() self.mc_sampler.read_configuration_file() # we have to be able to find the structure directory self.mc_sampler.configuration.structures[ 'structure_directory'] = self.structure_directory self.mc_sampler.configure_qoi_manager() self.mc_sampler.configure_task_manager() self.mc_sampler.configure_pyposmat_datafile_out() self.mc_sampler.configure_pyposmat_badparameters_file() self.log_more_iteration_information()
def test____init__(): config_fn = test_case_MgO['config_fn'] data_in_fn = test_case_MgO['data_in_fn'] assert os.path.isfile(config_fn) assert os.path.isfile(data_in_fn) o = PyposmatFileSampler(config_fn=config_fn, data_in_fn=data_in_fn) assert isinstance(o.configuration, PyposmatConfigurationFile) assert isinstance(o.datafile_in, PyposmatDataFile) assert isinstance(o.datafile_out, PyposmatDataFile)
def dev____init__(): config_fn = test_case_MgO['config_fn'] data_in_fn = test_case_MgO['data_in_fn'] assert os.path.isfile(config_fn) assert os.path.isfile(data_in_fn) o = PyposmatFileSampler(config_fn=config_fn, data_in_fn=data_in_fn) s = [] s.append("type(o.configuration)={}".format(type(o.configuration))) s.append("type(o.datafile_in)={}".format(type(o.datafile_in))) s.append("type(o.datafile_out={}".format(type(o.datafile_out))) print("\n".join(s))
def test__configure_qoi_manager(): test_case = test_case_MgO o = PyposmatFileSampler(config_fn=test_case['config_fn'], data_in_fn=test_case['data_in_fn'], fullauto=False) o.read_configuration_file(filename=test_case['config_fn']) o.configure_qoi_manager() assert isinstance(o.configuration, PyposmatConfigurationFile) assert isinstance(o.qoi_manager, QoiManager)
def setup__configure_qoi_manager(): config_directory = "./data" config_fn = os.path.join(config_directory,'pyposmat.config.in') data_directory = "../../data/MgO_pareto_data" datafile_in_fn = os.path.join(data_directory,'culled_005.out') output_directory = "./" datafile_out_fn = os.path.join(output_directory,'qoiplus_005.out') o_sampler = PyposmatFileSampler( config_fn=config_fn, data_in_fn=datafile_in_fn, data_out_fn=datafile_out_fn) o_sampler.create_base_directories() o_sampler.read_configuration_file()
class PyposmatIterativeSampler(object): """ Iterative Sampler which wraps multiple simulation algorithms. This class wraps multiple simulation algorithms so that they can be run in an iterative manner. Since this class has so many configuration options, the attributes of this class is set by a YAML based configuration file. The class PyposmatConfigurationFile aids in the creation and reading of these options. These attributes are public and be set programmatically within a script. Notes: config_fn = 'data/pyposmat.config.in' engine = PyposmatIterativeSampler(configuration_filename=config_fn) engine.read_configuration_file() engine.run_all() Args: configuration_filename(str): the filename of the YAML configuration file is_restart(bool,optional): When set to True, this argument controls the restart behavior of this class. By default, is set to False is_auto(bool,optional): When set to True, this agument will automatically configure the class. By default this is set to False, mostly because this software is currently in development, and this necessary to to write integration testing log_fn(str,optional): This the filename path where to set logging, by default it is set as `pyposmat.log` contained in the configurable data directory log_to_stdout(bool,optional): When set to True, all log messages will be directed to standard out as well as the log file Attributes: mpi_comm(MPI.Intracomm) mpi_rank(int) mpi_size(int) mpi_nprocs(int) i_iteration(int) n_iterations(int) rv_seed(int) rv_seeds(np.ndarray) configuration_filename = configuration(filename) configuration(PyposmatConfigurationFile) mc_sampler(PyposmatMonteCarloSampler) root_directory(str) data_directory(str) is_restart(bool) start_iteration=0 """ parameter_sampling_types = [ 'parametric', 'kde', 'from_file', 'kde_w_clusters' ] def __init__(self, configuration_filename, is_restart=False, is_auto=False, log_fn=None, log_to_stdout=True): # formats should not contain a trailing end line chracter self.SECTION_HEADER_FORMAT = "\n".join([80 * '=', "{:^80}", 80 * "="]) self.RANK_DIR_FORMAT = 'rank_{}' self.mpi_comm = None self.mpi_rank = None self.mpi_size = None self.mpi_nprocs = None self.i_iteration = None self.rv_seed = None self.rv_seeds = None self.configuration_filename = configuration_filename self.configuration = None self.mc_sampler = None self.root_directory = os.getcwd() self.data_directory = 'data' self.is_restart = is_restart self.start_iteration = 0 self.log_fn = log_fn self.log_to_stdout = log_to_stdout self.o_log = None self.initialize_logger(log_fn=log_fn, log_to_stdout=log_to_stdout) if self.is_restart: self.delete_mpi_rank_directories() @property def structure_directory(self): if self.configuration is None: return None else: d = self.configuration.structures['structure_directory'] if not os.path.isabs(d): d = os.path.join(self.root_directory, d) return d @property def n_iterations(self): if self.configuration is None: return None else: return self.configuration.n_iterations @property def qoi_names(self): if self.configuration is None: return None else: return self.configuration.qoi_names @property def error_names(self): if self.configuration is None: return None else: return self.configuration.error_names def delete_mpi_rank_directories(self): if self.mpi_rank == 0: self.log('Deleting previous rank directories') mpi_rank_directories = [ d for d in os.listdir(self.root_directory) if d.startswith('rank_') ] for d in mpi_rank_directories: try: shutil.rmtree(os.path.join(self.root_directory, d)) except: raise MPI.COMM_WORLD.Barrier() def determine_last_iteration_completed(self): for i in range(self.n_iterations): results_fn = os.path.join(self.data_directory, 'pyposmat.results.{}.out'.format(i)) kde_fn = os.path.join(self.data_directory, 'pyposmat.kde.{}.out'.format(i + 1)) if os.path.isfile(results_fn) and os.path.isfile(kde_fn): if self.mpi_rank == 0: self.log('iteration {}: is complete'.format(i)) self.start_iteration = i + 1 else: self.start_iteration = i break MPI.COMM_WORLD.Barrier() return self.start_iteration def run_all(self): """runs all iterations This method runs all iterations """ self.setup_mpi_environment() self.initialize_data_directory() self.start_iteration = 0 if self.is_restart: self.determine_last_iteration_completed() if self.mpi_rank == 0: self.log("starting at simulation: {}".format(self.start_iteration)) MPI.COMM_WORLD.Barrier() for i in range(self.start_iteration, self.n_iterations): self.i_iteration = i # log iteration information self.log_iteration_information(i_iteration=i) self.run_simulations(i) MPI.COMM_WORLD.Barrier() if self.mpi_rank == 0: self.log("ALL SIMULATIONS COMPLETE FOR ALL RANKS") self.log("MERGING FILES") self.merge_data_files(i) self.merge_error_files(i) MPI.COMM_WORLD.Barrier() if self.mpi_rank == 0: self.log("ANALYZE RESULTS") self.analyze_results(i) MPI.COMM_WORLD.Barrier() if self.mpi_rank == 0: self.log(80 * '-') self.log('JOBCOMPLETE') def initialize_sampler(self, config_fn, results_fn, mpi_rank=None, mpi_size=None, o_log=None): """ initialize the sampling object This method initializes the `mc_sampler` attribute with a sampler. Note: This breakout is part of a larger effort within PYPOSPACK, to have more object-oriented approach for parametric sampling. The goal eventually is to implement an instance of PyposmatBaseSampler, and allow users of this software library to be able to extend this software by simply extending the base class. Args: config_fn(str): path to the configuration file results_fn(str): path to the results file mpi_rank(int,optional): the MPI rank of executing this method mpi_size(int,optional): the size of the MPI execution group o_log(PyposmatLogFile,str,optional): the log file. If a string is passed, then the sampling class will initialize a separate log file with the string of path created. If a log file object is passed, then sampling object will use that instance of the object to log information. By defaut, it will pass the attribute, `o_log`. """ assert type(config_fn) is str assert type(results_fn) is str assert type(mpi_rank) in [type(None), int] assert type(mpi_size) in [type(None), int] assert type(o_log) in [type(None), PyposmatLogFile, str] # check to see if the paths provided are absolute paths assert os.path.isabs(config_fn) assert os.path.isabs(results_fn) if mpi_rank is None: mpi_rank = self.mpi_rank if mpi_size is None: mpi_size = self.mpi_size self.mc_sampler = PyposmatMonteCarloSampler(filename_in=config_fn, filename_out=results_fn, mpi_rank=mpi_rank, mpi_size=mpi_size, o_log=o_log) self.mc_sampler.create_base_directories() self.mc_sampler.read_configuration_file() # we have to be able to find the structure directory self.mc_sampler.configuration.structures[ 'structure_directory'] = self.structure_directory self.mc_sampler.configure_qoi_manager() self.mc_sampler.configure_task_manager() self.mc_sampler.configure_pyposmat_datafile_out() self.mc_sampler.configure_pyposmat_badparameters_file() self.log_more_iteration_information() def initialize_file_sampler(self, config_fn, results_fn, i_iteration=0, mpi_rank=None, mpi_size=None, o_log=None): """ initialize the sampling object This method initializes the `mc_sampler` attribute with a sampler. Note: This breakout is part of a larger effort within PYPOSPACK, to have more object-oriented approach for parametric sampling. The goal eventually is to implement an instance of PyposmatBaseSampler, and allow users of this software library to be able to extend this software by simply extending the base class. Args: config_fn(str): path to the configuration file results_fn(str): path to the results file i_iteration(int,optional): the iteration to sample the file from, by default this is set to zero. mpi_rank(int,optional): the MPI rank of executing this method mpi_size(int,optional): the size of the MPI execution group o_log(PyposmatLogFile,str,optional): the log file. If a string is passed, then the sampling class will initialize a separate log file with the string of path created. If a log file object is passed, then sampling object will use that instance of the object to log information. By defaut, it will pass the attribute, `o_log`. """ assert type(config_fn) is str assert type(results_fn) is str assert type(mpi_rank) in [type(None), int] assert type(mpi_size) in [type(None), int] assert type(o_log) in [type(None), PyposmatLogFile, str] # check to see if the paths provided are absolute paths assert os.path.isabs(config_fn) assert os.path.isabs(results_fn) if mpi_rank is None: mpi_rank = self.mpi_rank if mpi_size is None: mpi_size = self.mpi_size # get the absolute path of the datafile we are sampling from data_in_fn = None if os.path.isabs( self.configuration.sampling_type[i_iteration]['file']): data_in_fn = self.configuration.sampling_type[i_iteration]['file'] else: data_in_fn = os.path.join( self.root_directory, self.configuration.sampling_type[i_iteration]['file']) data_out_fn = results_fn self.mc_sampler = PyposmatFileSampler(config_fn=config_fn, data_in_fn=data_in_fn, data_out_fn=data_out_fn, mpi_rank=mpi_rank, mpi_size=mpi_size, o_log=o_log, fullauto=False) self.mc_sampler.create_base_directories() self.mc_sampler.read_configuration_file() # we have to be able to find the structure directory self.mc_sampler.configuration.structures[ 'structure_directory'] = self.structure_directory self.mc_sampler.configure_qoi_manager() self.mc_sampler.configure_task_manager() self.mc_sampler.configure_datafile_out() self.mc_sampler.configure_pyposmat_badparameters_file() self.log_more_iteration_information() def initialize_rank_directory(self): """ create the rank directory This method defines the rank directory as an absolute path and stores it in the attribute `rank_directory`. If a current directory exists there, then it is deleted with alll it's contents and then recreated. """ rank_directory = os.path.join( self.root_directory, self.RANK_DIR_FORMAT.format(self.mpi_rank)) # find the directory, delete it and it's constants and then recreates ot if os.path.isdir(rank_directory): shutil.rmtree(rank_directory) os.mkdir(rank_directory) self.rank_directory = rank_directory def run_simulations(self, i_iteration): """ run simulation for a single iteration Each rank is given a different execution context so that the disk IO don't conflict """ self.initialize_rank_directory() config_filename = self.configuration_filename results_filename = os.path.join(self.rank_directory, 'pyposmat.results.out') bad_parameters_filename = os.path.join(self.rank_directory, 'pyposmat.badparameters.out') # change execution context for this rank os.chdir(self.rank_directory) # set random seed self.determine_rv_seeds() self.log_random_seeds(i_iteration=i_iteration) sampling_type = self.configuration.sampling_type[i_iteration]['type'] if self.mpi_rank == 0: self.log("sampling_type={}".format(sampling_type)) MPI.COMM_WORLD.Barrier() # <----- parameter sampling type --------------------------------------- if sampling_type == 'parametric': self.initialize_sampler(config_fn=config_filename, results_fn=results_filename, mpi_rank=self.mpi_rank, mpi_size=self.mpi_size, o_log=self.o_log) self.run_parametric_sampling(i_iteration=i_iteration) # <----- kde sampling sampling type --------------------------------------- elif sampling_type == 'kde': self.initialize_sampler(config_fn=config_filename, results_fn=results_filename, mpi_rank=self.mpi_rank, mpi_size=self.mpi_size, o_log=self.o_log) self.run_kde_sampling(i_iteration=i_iteration) # <----- sampling from a file type --------------------------------------- # get parameters from file elif sampling_type == 'from_file': self.initialize_file_sampler(config_fn=config_filename, results_fn=results_filename, mpi_rank=self.mpi_rank, mpi_size=self.mpi_size, o_log=self.o_log) self.run_file_sampling(i_iteration=i_iteration) # <----- kde with clusters sampling type --------------------------------------- elif sampling_type == 'kde_w_clusters': cluster_fn = "pyposmat.cluster.{}.out".format(i_iteration) pyposmat_datafile_in = os.path.join(self.root_directory, self.data_directory, cluster_fn) _config_filename = os.path.join(self.root_directory, self.configuration_filename) # determine number of sims for this rank _mc_n_samples = _mc_config['n_samples_per_cluster'] _n_samples_per_rank = int(_mc_n_samples / self.mpi_size) if _mc_n_samples % self.mpi_size > self.mpi_rank: _n_samples_per_rank += 1 # initialize sampling object o = PyposmatClusterSampler(o_logger=self.log, mpi_rank=self.mpi_rank, mpi_comm=self.mpi_comm, mpi_size=self.mpi_size) o.create_base_directories() o.read_configuration_file(filename=_config_filename) # check to see if clustered data file exists if self.mpi_rank == 0: if not os.path.isfile(pyposmat_datafile_in): kde_fn = "pyposmat.kde.{}.out".format(i_iteration) kde_fn = os.path.join(self.root_directory, self.data_directory, kde_fn) o.write_cluster_file(filename=kde_fn, i_iteration=i_iteration) MPI.COMM_WORLD.Barrier() o.configure_pyposmat_datafile_in(filename=pyposmat_datafile_in) # fix relative path to structure databae folder _structure_dir = o.configuration.structures['structure_directory'] o.configuration.structures['structure_directory'] = \ os.path.join('..',_structure_dir) # finish the rest of the initialization o.configure_qoi_manager() o.configure_task_manager() o.configure_pyposmat_datafile_out() MPI.COMM_WORLD.Barrier() # run simulations o.run_simulations(i_iteration=i_iteration, n_samples=_mc_n_samples, filename=pyposmat_datafile_in) MPI.COMM_WORLD.Barrier() else: error_dict = OrderedDict([('i_iteration', i_iteration), ('sampling_type', sampling_type)]) m = "unknown parameter sampling type: {}".format(sampling_type) m += "the valid sampling types are: {}".format(",".join( self.parameter_sampling_types)) raise PyposmatSamplingTypeError(m, error_dict) # return to root directory os.chdir(self.root_directory) def initialize_data_directory(self, data_directory=None): """ determine the absolute path of the data directory and create it This method sets the `data_directory` attribute of the class and creates the `data directory` if the data directory already exists. Args: data_directory(str):the path of the data directory, the path can be expressed in either a relative path, or an absolute path Returns: (str) the absolute path of the data directory Raises: OSError: if the directory is not able to be created """ assert type(data_directory) in [type(None), str] assert type(self.data_directory) in [type(None), str] # determine the data directory path if data_directory is None: if self.data_directory is None: self.data_directory = os.path.join(self.root_directory, 'data') else: if os.path.isabs(self.data_directory): self.data_directory = data_directory else: self.data_directory = os.path.join(self.root_directory, self.data_directory) elif os.path.isabs(data_directory): # absolute path self.data_directory = data_directory else: # create a absolute path from the relative path self.data_directory = os.path.join(self.root_directory, data_directory) self.data_directory = os.path.abspath(self.data_directory) # create data directory if self.mpi_rank == 0: try: os.mkdir(self.data_directory) self.log('created the data directory.') self.log('\tdata_directory;{}'.format(self.data_directory)) except FileExistsError as e: self.log( 'attempted to create data directory, directory already exists.' ) self.log('\tdata_directory:{}'.format(self.data_directory)) except OSError as e: self.log( 'attempted to create data directory, cannot create directory.' ) self.log('\tdata_directory:{}'.format(self.data_directory)) MPI.COMM_WORLD.Barrier() def run_parametric_sampling(self, i_iteration): """ run parametric sampling Args: i_iteration(int): what iteration of the sampling is happening """ assert type(i_iteration) is int assert type(self.mc_sampler) is PyposmatMonteCarloSampler self.mc_sampler.run_simulations( i_iteration=i_iteration, n_samples=self.determine_number_of_samples_per_rank( i_iteration=i_iteration)) def run_kde_sampling(self, i_iteration): """ run kde sampling Args: i_iteration(int): what iteration of the sampling is happening """ is_debug = False assert type(i_iteration) is int assert type(self.mc_sampler) is PyposmatMonteCarloSampler kde_filename = os.path.join(self.data_directory, 'pyposmat.kde.{}.out'.format(i_iteration)) n_samples_per_rank = self.determine_number_of_samples_per_rank( i_iteration=i_iteration) if is_debug: print('cwd:{}'.format(os.getcwd())) print('mpi_rank:{},kde_filename:{}'.format(self.mpi_rank, kde_filename)) print('n_samples_per_rank:{}'.format(n_samples_per_rank)) self.mc_sampler.run_simulations(i_iteration=i_iteration, n_samples=n_samples_per_rank, filename=kde_filename) def run_file_sampling(self, i_iteration): """ run file sampling Args: i_iteration(int): the iteration which to sampling for """ assert type(i_iteration) is int assert type(self.mc_sampler) is PyposmatFileSampler if 'file' in self.configuration.sampling_type[i_iteration]: filename = os.path.join( self.root_directory, self.configuration.sampling_type[i_iteration]['file']) else: if os.path.isabs(self.data_directory): filename = os.path.join( self.data_directory, 'pyposmat.kde.{}.out'.format(i_iteration)) else: filename = os.path, join( self.root_directory, self.data_directory, 'pyposmat.kde.{}.out'.format(i_iteration)) if self.mpi_rank == 0: self.log(80 * '-') self.log('{:^80}'.format('file sampling')) self.log(80 * '-') self.log('filename_in:{}'.format(filename)) MPI.COMM_WORLD.Barrier() self.mc_sampler.run_simulations( i_iteration=i_iteration, n_samples=self.determine_number_of_samples_per_rank( i_iteration=i_iteration), filename=filename) def determine_number_of_samples_per_rank(self, i_iteration, N_samples=None): """ determine the number of samples per rank The total number of samples needs to be broken up between the ranks, but roughly divided the work evenly. Args: i_iteration(int): which iteration we are in the simulation N_samples(int,optional): the total number of samples we are using for this iteration. If a number is provided, it will override the number of simulations specified in the configuration file. Returns: (int): the number of samples for this rank """ assert type(i_iteration) is int assert type(N_samples) in [type(None), int] assert type(self.configuration) is PyposmatConfigurationFile if N_samples is None: N_samples = self.configuration.sampling_type[i_iteration][ 'n_samples'] N_samples_per_rank = int(N_samples / self.mpi_size) if N_samples % self.mpi_size > self.mpi_rank: N_samples_per_rank += 1 return N_samples_per_rank def initialize_logger(self, log_fn=None, log_to_stdout=None): """initialize log object Args: log_fn(str,optional) """ assert type(log_fn) in [type(None), str] assert type(log_to_stdout) in [type(None), bool] if log_fn is None: self.log_fn = os.path.join(self.root_directory, self.data_directory, 'pyposmat.log') else: self.log_fn = log_fn self.o_log = PyposmatLogFile(filename=self.log_fn) self.log_to_stdout = log_to_stdout def setup_mpi_environment(self): self.mpi_comm = MPI.COMM_WORLD self.mpi_rank = self.mpi_comm.Get_rank() self.mpi_size = self.mpi_comm.Get_size() self.mpi_procname = MPI.Get_processor_name() self.log_mpi_environment() # random seed management def determine_rv_seeds(self, seed=None, i_iteration=None): """ set the random variable seed across simulations Args: seed(int,optional)=a seed to determine the rest of the seeds for different ranks and iterations. """ RAND_INT_LOW = 0 RAND_INT_HIGH = 2147483647 assert type(seed) in [type(None), int] assert type(i_iteration) in [type(None), int] if type(i_iteration) is type(None): i_iteration = self.i_iteration # set the seed attribute if type(seed) is int: self.rv_seed == seed # set the seed attribute, if the seed attribute is none if self.rv_seed is None: self.rv_seed = np.random.randint(low=RAND_INT_LOW, high=RAND_INT_HIGH) # if the rv_seed was determined in the script, then all ranks will # have the same rv_seed attribute np.random.seed(self.rv_seed) # each rank, will need it's own seed. So we sample from the freshly # generated random number generator, which is identical across ranks self.rv_seeds = np.random.randint(low=0, high=2147483647, size=(int(self.mpi_size), self.n_iterations)) # now restart the seed for this rank np.random.seed(self.rv_seeds[self.mpi_rank, i_iteration]) # logging methods def log(self, s): if self.log_to_stdout: print(s) if self.o_log is not None: self.o_log.write(s) def log_iteration_information(self, i_iteration): """log iteration information Args: i_iteration_id(int):the iteration number Returns: (str) the log string """ if self.mpi_rank == 0: s = self.SECTION_HEADER_FORMAT.format( 'Begin Iteration {}/{}'.format(i_iteration + 1, self.n_iterations)) self.log(s) MPI.COMM_WORLD.Barrier() #if self.mpi_rank == 0: # return "\n".join(s) def log_more_iteration_information(self): #TODO: this logging needs to go into a separate logging method. -EJR if self.mpi_rank == 0: self.mc_sampler.print_structure_database() self.mc_sampler.print_sampling_configuration() if self.mpi_rank == 0 and self.i_iteration == 0: self.mc_sampler.print_initial_parameter_distribution() if self.mpi_rank == 0: self.log(80 * '-') MPI.COMM_WORLD.Barrier() def log_mpi_environment(self): if self.mpi_rank == 0: m = [ self.SECTION_HEADER_FORMAT.format( 'MPI communication information') ] m += ['mpi_size={}'.format(self.mpi_size)] MPI.COMM_WORLD.Barrier() def log_random_seeds(self, i_iteration): if self.mpi_rank == 0: self.log(80 * '-') self.log('{:^80}'.format('GENERATED RANDOM SEEDS')) self.log(80 * '-') self.log('global_seed:{}'.format(str(self.rv_seed))) self.log('seeds_for_this_iteration:') self.log('{:^8} {:^8}'.format('rank', 'seed')) self.log('{} {}'.format(8 * '-', 8 * '-')) MPI.COMM_WORLD.Barrier() for i_rank in range(self.mpi_size): if self.mpi_rank == i_rank: self.log('{:^8} {:>10}'.format( i_rank, self.rv_seeds[i_rank, i_iteration])) MPI.COMM_WORLD.Barrier() def get_results_dict(self): rd = OrderedDict() rd['mpi'] = OrderedDict() rd['mpi']['size'] = self.mpi_size def analyze_data_directories(self, data_dir=None): _d = data_dir i = 0 contents = [] if not os.path.exists(_d): return i, contents if not os.path.isdir(_d): return i, contents while True: kde_fn = os.path.join(_d, "pyposmat.kde.{}.out".format(i)) if os.path.exists(kde_fn): contents.append(kde_fn) else: if i > 0: contents.append(results_fn) break results_fn = os.path.join(_d, "pyposmat.results.{}.out".format(i)) if os.path.exists(results_fn): pass else: break i = i + 1 return i, contents def analyze_rank_directories(self, root_dir=None): i = 0 contents = [] if root_dir is None: _d = self.root_directory else: _d = root_directory while True: rank_dir = os.path.join(_d, "rank_{}".format(i)) if not os.path.exists(rank_dir): break if not os.path.isdir(rank_dir): break rank_fn = os.path.join("rank_{}".format(i), "pyposmat.results.out") if not os.path.exists(os.path.join(_d, rank_fn)): break if not os.path.isfile(os.path.join(_d, rank_fn)): break else: contents.append(rank_fn) i = i + 1 return i, contents def find_initial_parameters_file(self): if 'file' in self.configuration.sampling_type[0]: _init_fn = os.path.join( self.root_directory, self.configuration.sampling_type[0]['file']) if os.path.exists(_init_fn): if os.path.isfile(_init_fn): return _init_fn else: return None def merge_data_files(self, i_iteration, last_datafile_fn=None, new_datafile_fn=None): """ merge the pyposmat data files Args: i_iteration(int): the current iteration which just finished last_datafile_fn(str,optional): the filename of the last dataset in the data directory. new_datafile_fn(str,optional): where to output the file results """ if last_datafile_fn is None: last_datafile_fn = os.path.join( self.data_directory, 'pyposmat.kde.{}.out'.format(i_iteration)) if new_datafile_fn is None: new_datafile_fn = os.path.join( self.data_directory, 'pyposmat.results.{}.out'.format(i_iteration)) data_dir = self.data_directory rank_dirs = [ v for v in os.listdir(self.root_directory) if v.startswith('rank_') ] filenames = [ os.path.join(self.root_directory, v, 'pyposmat.results.out') for v in rank_dirs ] data = None for i, v in enumerate(filenames): data_new = None if i == 0: data = PyposmatDataFile() data.read(filename=v) else: data_new = PyposmatDataFile() data_new.read(filename=v) data.df = pd.concat([data.df, data_new.df]) nrows = len(data.df) if self.configuration.sampling_type[i_iteration][ 'type'] == 'from_file': pass else: sim_id_fmt = '{:0>2}_{:0>6}' sim_id_str = [ sim_id_fmt.format(i_iteration, i) for i in range(nrows) ] data.df['sim_id'] = [ sim_id_fmt.format(i_iteration, i) for i in range(nrows) ] if self.configuration.sampling_type[i_iteration][ 'type'] == "from_file": data_new = PyposmatDataFile() data_new.read(filename=filenames[0]) data_new.df = data.df data_new.write(filename=new_datafile_fn) else: self.log("merging with candidates from previous simulations") self.log("\tfilename:{}".format(last_datafile_fn)) data_old = PyposmatDataFile() try: data_old.read(filename=last_datafile_fn) data_old.df = pd.concat([data_old.df, data.df]) data_old.write(filename=new_datafile_fn) except FileNotFoundError as e: if i_iteration == 0: data.write(filename=new_datafile_fn) else: raise def merge_error_files(self, i_iteration): """ merge the pyposmat data files Args: i_iteration(int): the current iteration which just finished last_datafile_fn(str,optional): the filename of the last dataset in the data directory. new_datafile_fn(str,optional): where to output the file results """ badparameters_fn = os.path.join(self.data_directory, 'pyposmat.badparameters.out') data_dir = self.data_directory rank_dirs = [ v for v in os.listdir(self.root_directory) if v.startswith('rank_') ] filenames = [ os.path.join(self.root_directory, v, 'pyposmat.badparameters.out') for v in rank_dirs ] # consolidate rank directories badparameters_new = None badparameters_next = None for i, v in enumerate(filenames): if badparameters_new is None: try: badparameters_new = PyposmatBadParametersFile( o_config=self.configuration) badparameters_new.read(filename=v) except FileNotFoundError as e: self.log("no bad parameters file at {}".format(v)) else: try: badparameters_next = PyposmatBadParametersFile( o_config=self.configuration) badparameters_next.read(filename=v) badparameters_new.df = pd.concat( [badparameters_new.df, badparameters_next.df]) except FileNotFoundError as e: self.log("no bad parameters file as {}".format(v)) # determine the sim_id for bad parameters of the sim_id if badparameters_new.df is None: # no previous bad paramters found # TODO: need to implement something here to deal with bad parameters pass else: nrows = len(badparameters_new.df) sim_id_fmt = '{:0>2}_{:0>6}' sim_id_str = [ sim_id_fmt.format(i_iteration, i) for i in range(nrows) ] badparameters_new.df['sim_id'] = sim_id_str if self.configuration.sampling_type[i_iteration][ 'type'] == "from_file": badparameters_new.write(filename=badparameters_fn) else: self.log( "merging with bad candidates from previous simulations") self.log("\tfilename:{}".format(badparameters_fn)) badparameters = PyposmatBadParametersFile( o_config=self.configuration) try: badparameters.read(filename=badparameters_fn) badparameters.df = pd.concat( [badparameters.df, badparameters_new.df]) badparameters.write(filename=badparameters_fn) except FileNotFoundError as e: if i_iteration == 0: badparameters_new.write(filename=badparameters_fn) else: raise def analyze_results(self, i_iteration, data_fn=None, config_fn=None, kde_fn=None, analysis_fn=None): """ analyze the results of the simulation this method analyzes the results of the simulation, and does post simulation tasks, such as filtering by qoi performance, pareto optimization, etc. Args: data_fn(str): the path of the data file. By default this is set to none where the the file will be determine by i_iteration and internal attributes config_fn(str): the path of the data file. By default this is set to none where the the file will be determine by i_iteration and internal attributes kde_fn(str): the path of the data file. By default this is set to none where the the file will be determine by i_iteration and internal attributes """ if data_fn is None: data_fn = os.path.join(\ self.root_directory, self.data_directory, 'pyposmat.results.{}.out'.format(i_iteration)) if config_fn is None: config_fn = os.path.join(\ self.root_directory, self.configuration_filename) if kde_fn is None: kde_fn = os.path.join(\ self.root_directory, self.data_directory, 'pyposmat.kde.{}.out'.format(i_iteration+1)) if analysis_fn is None: analysis_fn = os.path.join(self.root_directory, self.data_directory, 'pyposmat.analysis.out') data_analyzer = PyposmatDataAnalyzer() data_analyzer.initialize_configuration(config_fn=config_fn) data_analyzer.analyze_results_data(i_iteration, filename=data_fn) assert isinstance(data_analyzer.results_statistics, OrderedDict) if os.path.isfile(analysis_fn): data_analyzer.read_analysis_file(filename=analysis_fn) self.log( data_analyzer.str__results_descriptive_statistics( statistics=data_analyzer.results_statistics)) self.log(data_analyzer.str__qoi_filtering_summary()) data_analyzer.write_kde_file(filename=kde_fn) data_analyzer.analyze_kde_data(i_iteration, filename=kde_fn) assert isinstance(data_analyzer.kde_statistics, OrderedDict) self.log( data_analyzer.str__kde_descriptive_statistics( statistics=data_analyzer.kde_statistics)) data_analyzer.update_analysis(i_iteration) data_analyzer.write_analysis_file(filename=analysis_fn) def read_configuration_file(self, filename=None): assert type(filename) in [type(None), str] assert type(self.configuration_filename) in [type(None), str] if filename is not None: self.configuration_filename = filename if not os.path.isabs(self.configuration_filename): self.configuration_filename = os.path.abspath( self.configuration_filename) self.configuration = PyposmatConfigurationFile() self.configuration.read(filename=self.configuration_filename) if self.mpi_rank == 0: self._write_parameter_names() self._write_qoi_names() self._write_error_names() def _write_parameter_names(self, parameter_names=None): if parameter_names is None: _parameter_names = self.parameter_names else: _parameter_names = parameter_names s = [80 * '-'] s += ['{:^80}'.format('PARAMETER_NAMES')] s += [80 * '-'] s += [p for p in _parameter_names] self.log("\n".join(s)) def _write_qoi_names(self, qoi_names=None): if qoi_names is None: _qoi_names = self.qoi_names else: _qoi_names = qoi_names s = [80 * '-'] s += ['{:^80}'.format('QOI_NAMES')] s += [80 * '-'] s += [p for p in _qoi_names] self.log("\n".join(s)) def _write_error_names(self, error_names=None): if error_names is None: _error_names = self.error_names else: _error_names = error_names s = [80 * '-'] s += ['{:^80}'.format('ERROR_NAMES')] s += [80 * '-'] s += [p for p in _error_names] self.log("\n".join(s))
def test____init__(): o_sampler = PyposmatFileSampler( config_fn=config_fn, data_in_fn=datafile_in_fn, data_out_fn=datafile_out_fn) assert o_sampler.reference_potentials == reference_potentials def reference_potential_names_to_string(o_sampler): s = 80*'-'+"\n" s += "{:^80}\n".format('CHECK REFERENCE POTENTIALS') s += 80*'-'+"\n" for v in o_sampler.reference_potentials: s += "{}.{}\n".format( v, v in o_sampler.reference_potentials) return s if __name__ == "__main__": print ("config_directory:{}".format(config_directory)) print ("data_directory:{}".format(data_directory)) print ("output_directory:{}".format(output_directory)) o_sampler = PyposmatFileSampler( config_fn=config_fn, data_in_fn=datafile_in_fn, data_out_fn=datafile_out_fn) print(reference_potential_names_to_string(o_sampler=o_sampler))
def test__subselect_by_dmetric(): o_sampler = PyposmatFileSampler( config_fn=config_fn, data_in_fn=datafile_in_fn, data_out_fn=datafile_out_fn ) o_sampler.create_base_directories() o_sampler.read_configuration_file() o_sampler.configure_qoi_manager(use_fitting_qois=False,use_testing_qois=True) o_sampler.configure_task_manager() o_sampler.configure_datafile_out() o_sampler.subselect_by_dmetric(nsmallest=n_smallest) import pandas as pd assert type(o_sampler.subselect_df) is pd.DataFrame
config_directory = "./data" config_fn = os.path.join(config_directory,'pyposmat.config.in') data_directory = "../../data/MgO_pareto_data" datafile_in_fn = os.path.join(data_directory,'culled_005.out') output_directory = "./" datafile_out_fn = os.path.join(output_directory,'qoiplus_005.out') from pypospack.pyposmat.data import PyposmatConfigurationFile o_config=PyposmatConfigurationFile() o_config.read(filename=config_fn) o_sampler = PyposmatFileSampler( config_fn=config_fn, data_in_fn=datafile_in_fn, data_out_fn=datafile_out_fn ) check_reference_potentials(o_sampler=o_sampler) o_sampler.create_base_directories() o_sampler.read_configuration_file() # Determine which QOIS you want to calculate # calculate only the fitting qois #o_sampler.configure_qoi_manager(use_fitting_qois=True,use_testing_qois=False) # Calculate only the testing qois o_sampler.configure_qoi_manager(use_fitting_qois=False,use_testing_qois=True) # Calculate all qois #o_sampler.configure_qoi_manager(use_fitting_qois=True,use_testing_qois=True)