Ejemplo n.º 1
0
    def _configure_logger(self,o_log=None):
        """configure the logging service

        Configuration of the log object has different behavior based upon the type passed
        into the argument o_log.  If o_log is PyposmatLogFile, that object will be accessed
        by reference.  A string is assumed to be a filename location.  By default the
        argument for o_log is None, which means logging will go to standard out by means of
        the print() function.

        Args:
            o_log (str,PyposmatLogFile,optional): default: None
        Raises:
            TypeError
        """

        assert type(o_log) in [type(None),str,PyposmatLogFile]

        if type(o_log) is PyposmatLogFile:
            self.obj_log = o_log
        elif type(o_log) is str:
            self.obj_log = PyposmatLogFile(filename=o_log)
        elif type(o_log) is type(None):
            self.obj_log = PyposmatLogFile()
        else:
            m = "o_log must be str, PyposmatLogFile, or None"
            raise TypeError(m)
Ejemplo n.º 2
0
    def configure_logger(self,o_log=None,log_to_stdout=True):
        """
        Configurtion of the log object has different behavior based upon the type passed
        into the argument o_log.  If o_log is PyposmatLogFile, that object will be accessed
        by reference.  A string is assumed to be a filename location.  By default the
        argument for o_log is None, which means logging will go to standard out by means of 
        the print() function.

        Args:
            o_log (str,PyposmatLogFile,None): default: None
        """

        if type(o_log) is PyposmatLogFile:
            self.obj_log = o_log
        elif type(o_log) is str:
            self.obj_log = PyposmatLogFile(filename=o_log)
        elif o_log is None:
            self.obj_log = None
        else:
            m = "log object must be str, PyposmatLogFile, or None"
            raise TypeError(m)

        if isinstance(log_to_stdout,bool):
            self.log_to_stdout = log_to_stdout
        else:
            m = "log_to_stdout must be boolean"
            raise TypeError(m)
Ejemplo n.º 3
0
    def initialize_logger(self, log_fn=None, log_to_stdout=None):
        """initialize log object
        
        Args:
            log_fn(str,optional)

        """

        assert type(log_fn) in [type(None), str]
        assert type(log_to_stdout) in [type(None), bool]

        if log_fn is None:
            self.log_fn = os.path.join(self.root_directory,
                                       self.data_directory, 'pyposmat.log')
        else:
            self.log_fn = log_fn
        self.o_log = PyposmatLogFile(filename=self.log_fn)

        self.log_to_stdout = log_to_stdout
Ejemplo n.º 4
0
class PyposmatMonteCarloSampler(PyposmatEngine):
    def __init__(self,
                 filename_in='pyposmat.config.in',
                 filename_out='pyposmat.results.out',
                 o_log=None,
                 mpi_rank=None,
                 mpi_size=None,
                 base_directory=None):
        """Additional attributes are set by the base class :obj:PyposmatEngine

        Args:
            filename_in (str) - path of the configuration file
            filename_out (str) - path of the output file
            o_log (PyposmatLogFile) - if type(o_log) is a string, then the string is treated as a path in which to log information to.  If type(o_log) is PyposmatLogFile then it is set as an attribute for the refernce.
            mpi_rank (int)
            mpi_size (int)
            base_directory (str,optional): Either the relative or full path which provides a
        unique drive addressing space for simultaneously running simulations.
        Attributes:
            mpi_rank (int) - this is passed in
            mpi_size (int) - this is passed in
            pyposmat_data_in_filename (str) - the path of the datafile to read in
            pyposmat_data_out_filename (str) - the path of the datafile to write simulation results to
        """
        assert isinstance(filename_in, str)
        assert isinstance(filename_out, str)
        assert type(base_directory) in [str, type(None)]

        PyposmatEngine.__init__(self,
                                filename_in=filename_in,
                                filename_out=filename_out,
                                base_directory=base_directory,
                                fullauto=False)

        if mpi_rank is None:
            self.mpi_rank = 0
        else:
            self.mpi_rank = mpi_rank

        if mpi_size is None:
            self.mpi_size = 1
        else:
            self.mpi_size = mpi_size

        assert self.mpi_rank < self.mpi_size

        self.mpi_rank = mpi_rank
        self.mpi_size = mpi_size
        self.pyposmat_data_in_filename = None
        self.pyposmat_data_out_filename = filename_out
        self.pyposmat_badparameters_filename = 'pyposmat.badparameters.out'

        try:
            self.configure_logger(o_log)
        except TypeError as e:
            m = "Unable to to configure obj_log based on attribute log:{}".format(
                str(o_log))
            raise TypeError(m)

    def configure_logger(self, o_log=None):
        """
        Configurtion of the log object has different behavior based upon the type passed
        into the argument o_log.  If o_log is PyposmatLogFile, that object will be accessed
        by reference.  A string is assumed to be a filename location.  By default the
        argument for o_log is None, which means logging will go to standard out by means of 
        the print() function.

        Args:
            o_log (str,PyposmatLogFile,None): default: None
        """

        if type(o_log) is PyposmatLogFile:
            self.obj_log = o_log
        elif type(o_log) is str:
            self.obj_log = PyposmatLogFile(filename=o_log)
        elif o_log is None:
            self.obj_log = None
        else:
            m = "log object must be str, PyposmatLogFile, or None"
            raise TypeError(m)

    def log(self, str_msg):
        if type(str_msg) is str:
            m = str_msg
        elif type(str_msg) is list:
            m = "\n".join(str_msg)

        if type(self.obj_log) is PyposmatLogFile:
            self.obj_log.write(m)
        print(m)

    def configure_pyposmat_datafile_in(self, filename):
        self.pyposmat_data_in_filename = filename
        self.pyposmat_datafile_in = PyposmatDataFile(filename)

    def configure_pyposmat_datafile_out(self, filename=None):
        if filename is not None:
            assert type(filename) is str
            self.pyposmat_data_out_filename = filename
        self.pyposmat_datafile_out = PyposmatDataFile(filename)

    def configure_pyposmat_badparameters_file(self, filename=None):
        if filename is not None:
            assert type(filename) is str
            self.pyposmat_badparameters_filename = filename

        self.pyposmat_badparameters = PyposmatBadParametersFile(
            filename=self.pyposmat_badparameters_filename,
            o_config=self.configuration)

    def read_configuration_file(self, filename=None):
        PyposmatEngine.read_configuration_file(self, filename=filename)
        # self.structure_directory = self.configuration.structures['structure_directory']
        self.n_iterations = self.configuration.sampling_type['n_iterations']
        self.parameter_names = [
            p for p in self.configuration.sampling_distribution
        ]
        self.qoi_names = [k for k in self.configuration.qois]
        self.error_names = ['{}.err'.format(k) for k in self.qoi_names]
        self.parameter_distribution_definition =\
                self.configuration.sampling_distribution

        try:
            self.free_parameter_names = [
                k for k, v in self.parameter_distribution_definition.items()
                if v[0] != 'equals'
            ]
        except KeyError as e:
            print(self.parameter_distribution_definition.items())
            raise
        if self.configuration.sampling_constraints is not None:
            self.parameter_constraints = copy.deepcopy(
                self.configuration.sampling_constraints)
        else:
            self.parameter_constraints = OrderedDict()

        self.constrained_parameter_names = []
        for p in self.parameter_names:
            if p not in self.free_parameter_names:
                self.constrained_parameter_names.append(p)

    def run_simulations(self, i_iteration, n_samples=None, filename=None):
        """

        Args:
            i_iteration(int): the iteration cycle we are on.
            n_samples(int,optional): the number of parameters to evaluate
            filename(str,optional): the filename
        """

        assert type(i_iteration) is int
        assert type(n_samples) in [type(None), int]
        assert type(filename) in [type(None), str]

        i = i_iteration
        _sampling_type = self.configuration.sampling_type[i]['type']
        _n_samples = self.configuration.sampling_type[i]['n_samples']

        if self.mpi_rank == 0:
            m = [
                "R{}: Starting iteration N={}".format(self.mpi_rank,
                                                      i_iteration)
            ]
            if _sampling_type is "from_file":
                m += [
                    "R{}: Sampling parameters from {}".format(
                        self.mpi_rank, filename)
                ]
            else:
                m += [
                    "R{}: Attemping n_samples={} with sampling_type={}".format(
                        self.mpi_rank, _n_samples, _sampling_type)
                ]
            if filename is not None:
                m += ["R{}: Using file:{}".format(self.mpi_rank, filename)]
            self.log(m)

        if n_samples is not None:
            _n_samples = n_samples

        if _sampling_type == 'parametric':
            self.run_parameteric_sampling(n_samples=_n_samples)
        elif _sampling_type == 'kde':
            if filename is None:
                raise ValueError('cannot do kde sampling with out filename')
            self.run_kde_sampling(n_samples=_n_samples, filename_in=filename)
        elif _sampling_type == 'from_file':
            if filename is None:
                raise ValueError('cannot do filesampling without file')
            self.run_file_sampling(filename)
        else:
            raise ValueError('unknown sampling type:{}'.format(_sampling_type))

    def write_badparameters_header(self):
        self.pyposmat_badparameters.write_header_section(
            filename=self.pyposmat_badparameters_filename)

    def write_data_out_header(self):
        self.pyposmat_datafile_out.write_header_section(
            filename=self.pyposmat_data_out_filename,
            parameter_names=self.parameter_names,
            qoi_names=self.qoi_names,
            error_names=self.error_names)

    def get_sim_id(self, i, s=None):
        if s is not None:
            return s
        elif isinstance(i, int):
            return str(i)
        else:
            m = 'cannot determine sim_id from i:{} and s:{}'.format(i, s)
            raise TypeError(m)

    def run_parameteric_sampling(self, n_samples):

        # create random number generator
        _rv_generators = OrderedDict()
        for p in self.free_parameter_names:
            distribution_type = self.parameter_distribution_definition[p][0]
            if distribution_type == 'uniform':
                _a = self.parameter_distribution_definition[p][1]['a']
                _b = self.parameter_distribution_definition[p][1]['b']
                _loc = _a
                _scale = _b - _a
                _rv_generators[p] = scipy.stats.uniform(loc=_loc, scale=_scale)
            elif distribution_type == 'normal':
                _mu = self.parameter_distribution_definition[p][1]['mu']
                _sigma = self.parameter_distribution_definition[p][1]['sigma']
                _loc = _mu
                _scale = _sigma
                _rv_generators[p] = scipy.stats.norm(loc=_loc, scale=_scale)
            else:
                raise ValueError(
                    'unknown distribution type: {}'.format(distribution_type))

        self.write_data_out_header()
        self.write_badparameters_header()

        time_start_iteration = time.time()
        _n_errors = 0

        for i_sample in range(n_samples):
            # determin sim_id
            sim_id = self.get_sim_id(i=i_sample)

            # new OrderedDict to hold in parameter values
            _parameters = OrderedDict([(p, None)
                                       for p in self.parameter_names])

            # generate free parameters for ordered dictionary
            for p in self.free_parameter_names:
                _parameters[p] = _rv_generators[p].rvs(size=1)[0]

            # determine parameters determined from equality constraints
            for p in self.constrained_parameter_names:
                _constraint_type = self.parameter_distribution_definition[p][0]
                if _constraint_type == 'equals':

                    # this condition is for fitting EoS for EAM function which
                    # requires a refernce ground state crystal structure
                    if p.endswith('latticetype'):
                        _v = self.parameter_distribution_definition[p][1]
                        _parameters[p] = _v

                    # process evaluation strings
                    elif type(self.parameter_distribution_definition[p]
                              [1]) is not list:
                        _str_eval = str(
                            self.parameter_distribution_definition[p][1])

                        # replace string values with numerical values
                        for fp in self.free_parameter_names:
                            if fp in _str_eval:
                                _str_eval = _str_eval.replace(
                                    fp, str(_parameters[fp]))

                        # evaluate the string into a float
                        _parameters[p] = eval(_str_eval)
                    else:
                        raise ValueError("oops")

            # additional tasks added here
            for p in self.constrained_parameter_names:
                if self.parameter_distribution_definition[p][0] == 'equals':
                    if type(self.parameter_distribution_definition[p]
                            [1]) is list:
                        # required for EAM potentials to calculate dens_max for embedding function
                        if self.parameter_distribution_definition[p][1][
                                0] == 'equilibrium_density':
                            a0 = self.parameter_distribution_definition[p][1][
                                1]
                            latt = self.parameter_distribution_definition[p][
                                1][2]
                            _parameters[
                                p] = self.calculate_equilibrium_density(
                                    a0, latt, _parameters)

            try:
                # check constraints
                for k, v in self.parameter_constraints.items():
                    _eval_str = v
                    for pn, pv in _parameters.items():
                        _eval_str = _eval_str.replace(pn, str(pv))

                    if eval(_eval_str) is False:
                        m = "failed parameter constraint, {}".format(k)
                        raise PyposmatBadParameterError(m,
                                                        parameters=_parameters)

                _results = self.evaluate_parameter_set(parameters=_parameters)
            except PyposmatBadParameterError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except LammpsSimulationError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except PypospackTaskManagerError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except PypospackBadEamEosError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            else:

                #if type(sim_id) is float:
                #    _sim_id = int(sim_id)

                _sim_id = "{}".format(i_sample)

                self.pyposmat_datafile_out.write_simulation_results(
                    filename=self.pyposmat_data_out_filename,
                    sim_id=_sim_id,
                    results=_results)
            finally:
                # print out summaries every 10 solutions
                if (i_sample + 1) % 10 == 0:
                    n_samples_completed = i_sample + 1
                    time_end = time.time()
                    time_total = time_end - time_start_iteration
                    avg_time = time_total / n_samples_completed
                    _str_msg = 'R{}:{} samples completed in {:.4f}s. Avg_time = {:.4f}. n_errors = {}'.format(
                        self.mpi_rank, n_samples_completed, time_total,
                        avg_time, _n_errors)
                    self.log(_str_msg)

    def get_options_kde_bandwidth(self):
        """
        Returns:
            OrderedDict
        """

        kde_options = OrderedDict()
        kde_options['chiu1999'] = OrderedDict()
        kde_options['chiu1999'][
            'reference'] = 'Chiu, S.T. Ann. Stat. 1991, Vol. 19, No 4. 1883-1905'
        kde_options['chiu1999']['doi'] = '10.1214/aos/1176348376'
        kde_options['chiu1999']['description'] = ""
        kde_options['silverman1984'] = OrderedDict()
        kde_options['silverman1984'][
            'reference'] = 'Silverman, B.W. (1986). Density Estimation for Statistics and Data Analysis. London: Chapman & Hall/CRC. p. 48'
        kde_options['silverman1984']['isbn'] = '0-412-24620-1'

    def determine_kde_bandwidth(self, X, kde_bw_type):
        """ determine kde bandwidth

        Args:
            X(np.ndarray): array of data to determine the KDE bandwidth
            kde_bw_type(str): the method of estimating the optimal bandwidth
        """

        if self.mpi_rank == 0:
            self.log('determine kde bandwidth...')

        if kde_bw_type == 'chiu1999':
            try:
                h = Chiu1999_h(X)
            except ValueError as e:
                print(X)
                raise

        elif kde_bw_type == 'silverman1985':
            h = Silverman1986
        else:
            m = 'kde_bw_type, {}, is not an implemented bandwidth type'
            raise PypospackBadKdeBandwidthType(m)

        if self.mpi_rank == 0:
            self.log('{}:{}'.format(kde_bw_type, h))
        self.kde_bw_type = kde_bw_type
        self.kde_bw = h

        return self.kde_bw

    def run_kde_sampling(self,
                         n_samples,
                         filename_in,
                         cluster_id=None,
                         kde_bw_type='chiu1999'):
        """ sample from a KDE distribution

        Args:
            n_samples(int): the number of samples to draw from the KDE distribution
            filename_in(str): the path to the datafile from which the parameters will be drawn from
            cluster_id(int): if we need to use a specific cluster_id, we specify it here.  
                otherwise, it will be drawn from all parameters contained within the set.
            kde_bw_type(str): the method of estimating the optimal bandwidth
        """
        _datafile_in = PyposmatDataFile()
        _datafile_in.read(filename_in)

        if cluster_id is None:
            _free_parameter_names = [str(v) for v in self.free_parameter_names]
            _X = _datafile_in.df[_free_parameter_names].values.T
        else:
            # subselect the dataframe by the cluster_id of interest
            _datafile_in.df = _datafile_in.df.loc[_datafile_in.df['cluster_id']
                                                  == cluster_id]
            _X = _datafile_in.df[self.free_parameter_names].loc[
                _datafile_in.df['cluster_id'] == cluster_id].values.T
            # self.log.write("cluster_id {c} _X.shape={x}".format(c=cluster_id, x=_X.shape))

        kde_bw = self.determine_kde_bandwidth(X=_X, kde_bw_type=kde_bw_type)

        _rv_generator = scipy.stats.gaussian_kde(_X, kde_bw)

        self.write_data_out_header()
        self.write_badparameters_header()

        time_start_iteration = time.time()
        _n_errors = 0

        for i_sample in range(n_samples):
            # determine sim_id
            sim_id = self.get_sim_id(i=i_sample)

            # new OrderedDict to hold in parameter values
            _parameters = OrderedDict([(p, None)
                                       for p in self.parameter_names])

            # generate free parameters for ordered dictionary
            _free_parameters = _rv_generator.resample(1)
            for i, v in enumerate(self.free_parameter_names):
                _parameters[v] = float(_free_parameters[i, 0])

            # determine parameters determined from equality constraints
            for p in self.constrained_parameter_names:
                _constraint_type = self.parameter_distribution_definition[p][0]
                if _constraint_type == 'equals':

                    # this condition is for fitting EoS for EAM function which
                    # requires a refernce ground state crystal structure
                    if p.endswith('latticetype'):
                        _v = self.parameter_distribution_definition[p][1]
                        _parameters[p] = _v

                    # process evaluation strings
                    elif type(self.parameter_distribution_definition[p]
                              [1]) is not list:
                        _str_eval = str(
                            self.parameter_distribution_definition[p][1])

                        # replace string values with numerical values
                        for fp in self.free_parameter_names:
                            if fp in _str_eval:
                                _str_eval = _str_eval.replace(
                                    fp, str(_parameters[fp]))

                        # evaluate the string into a float
                        _parameters[p] = eval(_str_eval)
                    else:
                        raise ValueError("oops")

            for p in self.constrained_parameter_names:
                if self.parameter_distribution_definition[p][0] == 'equals':
                    # some EAM potentials have a normalizing equilbirum density
                    # which have to be determined based upon the parameterization of
                    # the electron density function
                    if type(self.parameter_distribution_definition[p]
                            [1]) is list:
                        if self.parameter_distribution_definition[p][1][
                                0] == 'equilibrium_density':
                            a0 = self.parameter_distribution_definition[p][1][
                                1]
                            latt = self.parameter_distribution_definition[p][
                                1][2]
                            _parameters[
                                p] = self.calculate_equilibrium_density(
                                    a0, latt, _parameters)

            try:
                # now we check parameter inequality constraints
                for k, v in self.parameter_constraints.items():
                    _eval_str = v
                    for pn, pv in _parameters.items():
                        _eval_str = _eval_str.replace(pn, str(pv))

                    if eval(_eval_str) is False:
                        s = 'parameter constraint failed, {}'.format(k)
                        raise PyposmatBadParameterError(s,
                                                        parameters=_parameters)
                _results = self.evaluate_parameter_set(parameters=_parameters)
            except PyposmatBadParameterError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except LammpsSimulationError as e:
                assert isinstance(self.pyposmat_badparameters,
                                  PyposmatBadParametersFile)
                assert isinstance(self.pyposmat_badparameters.parameter_names,
                                  list)
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except PypospackTaskManagerError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except PypospackBadEamEosError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            else:

                # determine sim_id
                _sim_id = int(i_sample)

                self.pyposmat_datafile_out.write_simulation_results(
                    filename=self.pyposmat_data_out_filename,
                    sim_id=i_sample,
                    cluster_id=cluster_id,
                    results=_results)
            finally:
                # print out summaries every 10 solutions
                if (i_sample + 1) % 10 == 0:
                    n_samples_completed = i_sample + 1
                    time_end = time.time()
                    time_total = time_end - time_start_iteration
                    avg_time = time_total / n_samples_completed
                    _str_msg = 'R{}:{} samples completed in {:.4f}s. Avg_time = {:.4f}. n_errors = {}'.format(
                        self.mpi_rank, n_samples_completed, time_total,
                        avg_time, _n_errors)
                    self.log(_str_msg)

        d = OrderedDict()
        d['kde_bandwidth'] = OrderedDict()
        d['kde_bandwidth']['type'] = self.kde_bw_type
        d['kde_bandwidth']['h'] = self.kde_bw

    def run_file_sampling(self, filename_in):

        _datafile_in = PyposmatDataFile(filename=filename_in)
        _datafile_in.read()
        # configure random number generator

        self.write_data_out_header()
        self.write_badparameters_header()

        time_start_iteration = time.time()

        _n_errors = 0
        i_sample = 0
        for row in _datafile_in.df.iterrows():
            if self.mpi_rank != i_sample % self.mpi_size:
                i_sample += 1
                continue
            else:
                i_sample += 1
            _parameters = OrderedDict([(p, row[1][p])
                                       for p in self.parameter_names])
            _sim_id = row[1]['sim_id']

            # generate wierd things
            for p in self.constrained_parameter_names:
                if self.parameter_distribution_definition[p][0] == 'equals':
                    if type(self.parameter_distribution_definition[p]
                            [1]) is list:
                        if self.parameter_distribution_definition[p][1][
                                0] == 'equilibrium_density':
                            a0 = self.parameter_distribution_definition[p][1][
                                1]
                            latt = self.parameter_distribution_definition[p][
                                1][2]
                            _parameters[
                                p] = self.calculate_equilibrium_density(
                                    a0, latt, _parameters)
            try:
                # check constraints
                for k, v in self.parameter_constraints.items():
                    _eval_str = v
                    for pn, pv in _parameters.items():
                        _eval_str = _eval_str.replace(pn, str(pv))
                    if eval(_eval_str) is False:
                        raise PyposmatBadParameterError()

                _results = self.evaluate_parameter_set(parameters=_parameters)
            except PyposmatBadParameterError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except LammpsSimulationError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except PypospackTaskManagerError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except PypospackBadEamEosError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            else:
                if type(_sim_id) is float: _sim_id = int(sim_id)
                self.pyposmat_datafile_out.write_simulation_results(
                    filename=self.pyposmat_data_out_filename,
                    sim_id=_sim_id,
                    results=_results)
            finally:
                # print out summaries every 10 solutions
                i_sample = i_sample + 1
                if (i_sample) % 10 == 0:
                    n_samples_completed = i_sample
                    time_end = time.time()
                    time_total = time_end - time_start_iteration
                    avg_time = time_total / n_samples_completed
                    _str_msg = '{} samples completed in {:.4f}s. Avg_time = {:.4f}. n_errors = {}'.format(
                        n_samples_completed, time_total, avg_time, _n_errors)
                    print('rank{}:'.format(self.mpi_rank) + _str_msg)

    def calculate_equilibrium_density(self, a0, latt, parameters):
        _parameters = OrderedDict()
        for k, v in parameters.items():
            if k.startswith('d_'):
                _parameters[k[2:]] = v
            s = k[2:].split('_')[0]
        _potential_type = self.configuration.potential['density_type']
        _symbols = self.configuration.potential['symbols']
        _module_name, _class_name = PotentialObjectMap(
            potential_type=_potential_type)
        try:
            _module = importlib.import_module(_module_name)
            _class = getattr(_module, _class_name)
            _dens_potential = _class(symbols=_symbols)
        except:
            raise

        if latt == 'fcc':
            d = OrderedDict([('1NN', 2 / (2**0.5) * a0), ('2NN', 1.000 * a0),
                             ('3NN', 1.225 * a0)])
            Z = OrderedDict([('1NN', 12), ('2NN', 6), ('3NN', 24)])
            rcut = (d['2NN'] + d['3NN']) / 2.

            rmax = 10.
            r = np.linspace(1, 10, 5000) * rmax / 10
            rho = _dens_potential.evaluate(r, _parameters, rcut)

            rho_e = 0
            for m in Z:
                if d[m] < rcut:
                    rho_e += Z[m] * np.interp(d[m], r, rho[s])

            return rho_e

    def print_structure_database(self):
        m = [
            80 * '-', '{:^80}'.format('STRUCTURE DATABASE'), 80 * '-',
            'structure_directory:{}'.format(self.structure_directory), '',
            '{:^20} {:^20}'.format('name', 'filename'),
            '{} {}'.format(20 * '-', 20 * '-')
        ]
        m += [
            '{:20} {:20}'.format(k, v)
            for k, v in self.structures['structures'].items()
        ]
        self.log(m)

    def print_sampling_configuration(self):
        print(80 * '-')
        print('{:^80}'.format('SAMPLING CONFIGURATION'))
        print(80 * '-')

        print('{:^10} {:^10} {:^20}'.format('iteration', 'n_samples',
                                            'sampling_type'))
        print('{} {} {}'.format(10 * '-', 10 * '-', 20 * '-'))

        for i in range(self.n_iterations):
            _sample_type = self.configuration.sampling_type[i]['type']
            if _sample_type == 'kde_w_clusters':
                _n_samples = self.configuration.sampling_type[i][
                    'n_samples_per_cluster']
            else:
                _n_samples = self.configuration.sampling_type[i]['n_samples']
            print('{:^10} {:^10} {:^20}'.format(i, _n_samples, _sample_type))

    def print_initial_parameter_distribution(self):
        print(80 * '-')
        print('{:80}'.format('INITIAL PARAMETER DISTRIBUTION'))
        print(80 * '-')
        for p in self.parameter_distribution_definition:
            if p in self.free_parameter_names:
                str_free = 'free'
                if self.parameter_distribution_definition[p][0] == 'uniform':
                    print('{:^20} {:^10} {:^10} {:^10} {:^10}'.format(
                        p, str_free,
                        self.parameter_distribution_definition[p][0],
                        self.parameter_distribution_definition[p][1]['a'],
                        self.parameter_distribution_definition[p][1]['b']))
                elif self.parameter_distribution_definition[p][0] == 'normal':
                    print('{:^20} {:^10} {:^10} {:^10} {:^10}'.format(
                        p, str_free,
                        self.parameter_distribution_definition[p][0],
                        self.parameter_distribution_definition[p][1]['mu'],
                        self.parameter_distribution_definition[p][1]['sigma']))
                else:
                    _distribution_type = self.parameter_distribution_defintion[
                        p][0]
                    s = "incorrection parameter distribution for parameter {}.  probability distribution function, {}, is not supported"
                    s = s.format(p, _distribution_type)
                    raise ValueError(s)

            else:
                str_free = 'not_free'
                print('{:^20} {:^10}'.format(p, str_free))
Ejemplo n.º 5
0
class PyposmatSampler(PyposmatEngine):
    """ Base Sampling Engine to build other engines upon

    Args:
        config_fn (str): filename of the configuration file
        data_out_fn (str): filename where to output the the simulation results
    Attributes:
        config_fn(str): filename of the configuration file
        data_in_fn(str):filename where to get previous simulation results
        data_out_fn(str):filename where to output the current simulation results
        parameters_fn(str):filename where to output current simulation results
        data_in(:obj:PyposmatDataFile): object for reading in a data file
        data_out(:obj:PyposmatDataFile): object for write out a data file

    """
    def __init__(self,
                 configuration='pyposmat.configuration.yaml',
                 mpi_rank=None,
                 mpi_size=None,
                 base_directory=None):

        assert isinstance(configuration,str) \
            or isinstance(configuration,PyposmatConfiguration)
        assert isinstance(results,str) \
            or isinstance(results,PyposmatData)
        assert
        # check types for the attributes
        assert type(config_fn) is str
        assert type(results_fn) is str
        assert type(data_in_fn) in [type(None),str]
        assert type(o_config) in [type(None),PyposmatConfigurationFile]
        assert type(o_log) in [type(None), PyposmatLogFile]
        assert type(mpi_rank) in [type(None),int]
        assert type(mpi_size) in [type(None),int]
        assert type(base_directory) in [type(None),str]

        super().__init__(
                filename_in=config_fn,
                filename_out=results_fn,
                base_directory=base_directory,
                fullauto=False)

        # default values for mpi_attributes
        self.mpi_rank = 0
        self.mpi_size = 1
        self._configure_mpi_attributes(mpi_rank=mpi_rank,mpi_size=mpi_size)

        # set up necessary filenames
        self.config_fn = config_fn
        self.data_in_fn = None
        self.data_out_fn = results_fn
        self.bad_parameters_fn = bad_parameters_fn

        # data_objects
        self.data_in = None
        self.data_out = None

        # configure log object
        self.obj_log = None
        self._configure_logger(o_log)

        # private attributes
        self._parameter_constraints = None
    def _configure_mpi_attributes(self,mpi_rank,mpi_size):
        # default values, these are set in __init__() but declared here for
        # clarity
        self.mpi_rank = 0
        self.mpi_size = 1

        # we enforce the condition that mpi_rank and mpi_size are both integer types
        # and that the mpi rank_id is less than the total number of mpi_ranks
        if all([type(mpi_rank) is int,type(mpi_size) is int]):
            if mpi_rank < mpi_size:
                self.mpi_rank = mpi_rank
                self.mpi_size = mpi_size

    def _configure_logger(self,o_log=None):
        """configure the logging service

        Configuration of the log object has different behavior based upon the type passed
        into the argument o_log.  If o_log is PyposmatLogFile, that object will be accessed
        by reference.  A string is assumed to be a filename location.  By default the
        argument for o_log is None, which means logging will go to standard out by means of
        the print() function.

        Args:
            o_log (str,PyposmatLogFile,optional): default: None
        Raises:
            TypeError
        """

        assert type(o_log) in [type(None),str,PyposmatLogFile]

        if type(o_log) is PyposmatLogFile:
            self.obj_log = o_log
        elif type(o_log) is str:
            self.obj_log = PyposmatLogFile(filename=o_log)
        elif type(o_log) is type(None):
            self.obj_log = PyposmatLogFile()
        else:
            m = "o_log must be str, PyposmatLogFile, or None"
            raise TypeError(m)

    @property
    def n_iterations(self):
        if type(self.configuration) is not type(None):
            return self.configuration.sampling_type['n_iterations']
        else:
            return None

    @property
    def parameter_names(self):
        if type(self.configuration) is not type(None):
            return self.configuration.parameter_names
        else:
            return None

    @property
    def qoi_names(self):
        if type(self.configuration) is not type(None):
            return self.configuration.qoi_names
        else:
            return None

    @property
    def error_names(self):
        if type(self.configuration) is not type(None):
            return self.configuration.error_names
        else:
            return None

    @property
    def parameter_distribution_definition(self):
        if type(self.configuation) is not type(None):
            return self.configuration.sampling_distribution
        else:
            return None

    @property
    def free_parameter_names(self):
        if type(self.configuration) is not type(None):
            return self.configuration.free_parameter_names
        else:
            return None

    @property
    def parameter_constraints(self):
        if type(self.configuration) is not type(None):
            if type(self._parameter_constraints) is type(None):
                return self.configuration.sampling_constraints
            else:
                return None
        else:
            return None

    @property
    def constrained_parameter_names(self):
        if type(self.configuration) is not type(None):
            return [p for p in self.parameter_names if p not in self.free_parameter_names]
        else:
            return None

    def log(self,str_msg):
        """log message to log file

        Args:
            str_msg (str,list):

        Raises:
            TypeError: If type(str_msg) not either a :obj:str or a :obj:list of :obj:str
        """

        assert type(str_msg) in [str,list]
        if type(str_msg) is list:
            assert all([type(v) is str for v in str_msg])

        self.obj_log.write(m)
        if type(str_msg) is str:
            m = str_msg
        elif type(str_msg) is list:
            m = "\n".join(str_msg)
        else:
            m = "str_msg must be either be a str or a list of str"
            raise TypeError(m)
        self.obj_log.write(m)

    def read_configuration_file(self,filename=None):
        """read the pyposmat configuration file

        This method overrides the inherited method.

        Args:
            filename(str,optional):path of the filename.  If the filename is
        not specified, then the method will run using the class attribute, `config_fn`

        Returns:
            Nothing returned

        Raises:
            TypeError
        """


        # In the previous iteration, this set a bunch of public attributes.  I
        # have reimplemented them as properties because it is much easier for an
        # external developer to understand property implemntation rather than search
        # for a property which maybe mutated.
        # -- EJR, 2/17/2019

        assert type(filename) in [type(None),str]

        if type(filename) is type(None):
            _filename = self.config_fn
        elif type(filename) is str:
            _filename = filename
        else:
            m = "filename must either be a str or NoneType"
            raise(TypeError(m))

        super().read_configuration_file(filename=_filename)

    def configure_pyposmat_datafile_in(self,filename=None):
        """ configures the data_in attribute

        Args:
            filename(str): path of the input file to be used
        """
        assert type(filename) in [type(None),str]

        if type(filename) is str: self.data_in_fn = filename
        _filename = self.data_in_fn
        self.data_in = PyposmatDataFile(filename=_filename)

    def configure_pyposmat_datafile_out(self,filename):
        """ configures the data_out attribute

        Args:
            filename(str): path of the output file to be used
        """

        assert type(filename) in [type(None),str]

        if type(filename) is str: self.data_out_fn = filename
        _filename = self.data_out_fn
        self.data_out = PyposmatDataFile(filename=_filename)

    def initalize_sampler(self):
        raise NotImplementedError

    def generate_free_parameters(self):
        """ stub implementation which needs to be overrided by the inheriting class"""
        free_parameters = OrderedDict()
        for p in self.free_parameter_names:
            free_parameters[p] = 0.
        return free_parameters

    def enforce_parameter_equality_constraints(self,free_parameters):
        constrained_parameters = OrderedDict()
        for p in self.constrained_parameter_names:
            _constraint_type =self.parameter_distribution_definition[p][0]
            if _constraint_type == 'equals':

                if p.endswith('latticetype'):
                    constrainted_parameters[p] = self.parameter_distribution[p][1]

                # evaluate the strings
                elif type(self.parameter_distribution_definition[p][1]) is not list:

                    # get the string to evaluate
                    s = str(self.parameter_distribution_definition[p][1])

                    # replace string values with numerical values
                    for fp in self.free_parameter_names:
                        if fp in s:
                            s = s.replace(fp,str(free_parameters[fp]))

                    # the string can now be evaluated as a float
                    constrainted_parameters[p] = eval(s)

    def enforce_parameter_inequality_constraints(self,parameters):

        # evaluation string
        for k,v in self.parameter_constraints.items():
            eval_str = v
            for pn,pv in parameters.items():
                eval_str = eval_str.replace(pn,str(pv))

            if not eval(eval_str):
                raise PyposmatBadParameterError()

    def run_simulations(self,i_iteration,n_samples=None,filename=None):
        """ base method to override

        """

        assert type(i_iteration) is int
        assert type(n_samples) in [type(None),int]
        assert type(filename) in [type(None),str]


        # define some convenience local variables for readability
        i = i_iteration
        if n_samples is not None:
            _n_samples = self.configuration.sampling_type[i]['n_samples']
        else:
            _n_samples = n_samples

        _sampling_type = self.configuration.sampling_type[i]['type']
        if filename is not None:
            _filename = self.configuration.sampling_type[i][n_samples]
        else:
            pass
Ejemplo n.º 6
0
class PyposmatIterativeSampler(object):
    """  Iterative Sampler which wraps multiple simulation algorithms.

    This class wraps multiple simulation algorithms so that they can be run in an iterative manner.
    Since this class has so many configuration options, the attributes of this class is set
    by a YAML based configuration file.  The class PyposmatConfigurationFile aids in the creation
    and reading of these options.  These attributes are public and be set programmatically within
    a script.

    Notes:
        config_fn = 'data/pyposmat.config.in'

        engine = PyposmatIterativeSampler(configuration_filename=config_fn)
        engine.read_configuration_file()
        engine.run_all()

    Args:
        configuration_filename(str): the filename of the YAML configuration file
        is_restart(bool,optional): When set to True, this argument controls the restart behavior
            of this class.  By default, is set to False
        is_auto(bool,optional): When set to True, this agument will automatically configure the
            class.  By default this is set to False, mostly because this software is currently in
            development, and this necessary to to write integration testing
        log_fn(str,optional): This the filename path where to set logging, by default it is set
           as `pyposmat.log` contained in the configurable data directory
        log_to_stdout(bool,optional): When set to True, all log messages will be directed to
           standard out as well as the log file

    Attributes:
        mpi_comm(MPI.Intracomm)
        mpi_rank(int)
        mpi_size(int)
        mpi_nprocs(int)
        i_iteration(int)
        n_iterations(int)
        rv_seed(int)
        rv_seeds(np.ndarray)
        configuration_filename = configuration(filename)
        configuration(PyposmatConfigurationFile)
        mc_sampler(PyposmatMonteCarloSampler)
        root_directory(str)
        data_directory(str)
        is_restart(bool)
        start_iteration=0
        
    """

    parameter_sampling_types = [
        'parametric', 'kde', 'from_file', 'kde_w_clusters'
    ]

    def __init__(self,
                 configuration_filename,
                 is_restart=False,
                 is_auto=False,
                 log_fn=None,
                 log_to_stdout=True):

        # formats should not contain a trailing end line chracter
        self.SECTION_HEADER_FORMAT = "\n".join([80 * '=', "{:^80}", 80 * "="])
        self.RANK_DIR_FORMAT = 'rank_{}'

        self.mpi_comm = None
        self.mpi_rank = None
        self.mpi_size = None
        self.mpi_nprocs = None
        self.i_iteration = None
        self.rv_seed = None
        self.rv_seeds = None

        self.configuration_filename = configuration_filename
        self.configuration = None
        self.mc_sampler = None

        self.root_directory = os.getcwd()
        self.data_directory = 'data'
        self.is_restart = is_restart
        self.start_iteration = 0

        self.log_fn = log_fn
        self.log_to_stdout = log_to_stdout
        self.o_log = None
        self.initialize_logger(log_fn=log_fn, log_to_stdout=log_to_stdout)

        if self.is_restart:
            self.delete_mpi_rank_directories()

    @property
    def structure_directory(self):
        if self.configuration is None:
            return None
        else:
            d = self.configuration.structures['structure_directory']

            if not os.path.isabs(d):
                d = os.path.join(self.root_directory, d)

            return d

    @property
    def n_iterations(self):
        if self.configuration is None:
            return None
        else:
            return self.configuration.n_iterations

    @property
    def qoi_names(self):
        if self.configuration is None:
            return None
        else:
            return self.configuration.qoi_names

    @property
    def error_names(self):
        if self.configuration is None:
            return None
        else:
            return self.configuration.error_names

    def delete_mpi_rank_directories(self):
        if self.mpi_rank == 0:
            self.log('Deleting previous rank directories')
            mpi_rank_directories = [
                d for d in os.listdir(self.root_directory)
                if d.startswith('rank_')
            ]
            for d in mpi_rank_directories:
                try:
                    shutil.rmtree(os.path.join(self.root_directory, d))
                except:
                    raise
        MPI.COMM_WORLD.Barrier()

    def determine_last_iteration_completed(self):

        for i in range(self.n_iterations):
            results_fn = os.path.join(self.data_directory,
                                      'pyposmat.results.{}.out'.format(i))
            kde_fn = os.path.join(self.data_directory,
                                  'pyposmat.kde.{}.out'.format(i + 1))

            if os.path.isfile(results_fn) and os.path.isfile(kde_fn):
                if self.mpi_rank == 0:
                    self.log('iteration {}: is complete'.format(i))
                self.start_iteration = i + 1
            else:
                self.start_iteration = i
                break

        MPI.COMM_WORLD.Barrier()
        return self.start_iteration

    def run_all(self):
        """runs all iterations

        This method runs all iterations

        """
        self.setup_mpi_environment()

        self.initialize_data_directory()

        self.start_iteration = 0

        if self.is_restart:
            self.determine_last_iteration_completed()

        if self.mpi_rank == 0:
            self.log("starting at simulation: {}".format(self.start_iteration))
        MPI.COMM_WORLD.Barrier()

        for i in range(self.start_iteration, self.n_iterations):
            self.i_iteration = i

            # log iteration information
            self.log_iteration_information(i_iteration=i)

            self.run_simulations(i)
            MPI.COMM_WORLD.Barrier()

            if self.mpi_rank == 0:
                self.log("ALL SIMULATIONS COMPLETE FOR ALL RANKS")
                self.log("MERGING FILES")
                self.merge_data_files(i)
                self.merge_error_files(i)
            MPI.COMM_WORLD.Barrier()

            if self.mpi_rank == 0:
                self.log("ANALYZE RESULTS")
                self.analyze_results(i)
            MPI.COMM_WORLD.Barrier()

        if self.mpi_rank == 0:
            self.log(80 * '-')
            self.log('JOBCOMPLETE')

    def initialize_sampler(self,
                           config_fn,
                           results_fn,
                           mpi_rank=None,
                           mpi_size=None,
                           o_log=None):
        """ initialize the sampling object 

        This method initializes the `mc_sampler` attribute with a sampler.

        Note:
            This breakout is part of a larger effort within PYPOSPACK, to have 
            more object-oriented approach for parametric sampling.  The goal 
            eventually is to implement an instance of PyposmatBaseSampler, and 
            allow users of this software library to be able to extend this 
            software by simply extending the base class.
        Args:
            config_fn(str): path to the configuration file
            results_fn(str): path to the results file
            mpi_rank(int,optional): the MPI rank of executing this method
            mpi_size(int,optional): the size of the MPI execution group
            o_log(PyposmatLogFile,str,optional): the log file.  If a string is 
                passed, then the sampling class will initialize a separate log 
                file with the string of path created.  If a log file object is 
                passed, then sampling object will use that instance of the 
                object to log information.  By defaut, it will pass the 
                attribute, `o_log`.
        """

        assert type(config_fn) is str
        assert type(results_fn) is str
        assert type(mpi_rank) in [type(None), int]
        assert type(mpi_size) in [type(None), int]
        assert type(o_log) in [type(None), PyposmatLogFile, str]

        # check to see if the paths provided are absolute paths
        assert os.path.isabs(config_fn)
        assert os.path.isabs(results_fn)

        if mpi_rank is None: mpi_rank = self.mpi_rank
        if mpi_size is None: mpi_size = self.mpi_size

        self.mc_sampler = PyposmatMonteCarloSampler(filename_in=config_fn,
                                                    filename_out=results_fn,
                                                    mpi_rank=mpi_rank,
                                                    mpi_size=mpi_size,
                                                    o_log=o_log)
        self.mc_sampler.create_base_directories()
        self.mc_sampler.read_configuration_file()
        # we have to be able to find the structure directory
        self.mc_sampler.configuration.structures[
            'structure_directory'] = self.structure_directory
        self.mc_sampler.configure_qoi_manager()
        self.mc_sampler.configure_task_manager()
        self.mc_sampler.configure_pyposmat_datafile_out()
        self.mc_sampler.configure_pyposmat_badparameters_file()

        self.log_more_iteration_information()

    def initialize_file_sampler(self,
                                config_fn,
                                results_fn,
                                i_iteration=0,
                                mpi_rank=None,
                                mpi_size=None,
                                o_log=None):
        """ initialize the sampling object 

        This method initializes the `mc_sampler` attribute with a sampler.

        Note:
            This breakout is part of a larger effort within PYPOSPACK, to have 
            more object-oriented approach for parametric sampling.  The goal 
            eventually is to implement an instance of PyposmatBaseSampler, and 
            allow users of this software library to be able to extend this 
            software by simply extending the base class.
        Args:
            config_fn(str): path to the configuration file
            results_fn(str): path to the results file
            i_iteration(int,optional): the iteration to sample the file from,
                by default this is set to zero.
            mpi_rank(int,optional): the MPI rank of executing this method
            mpi_size(int,optional): the size of the MPI execution group
            o_log(PyposmatLogFile,str,optional): the log file.  If a string is 
                passed, then the sampling class will initialize a separate log 
                file with the string of path created.  If a log file object is 
                passed, then sampling object will use that instance of the 
                object to log information.  By defaut, it will pass the 
                attribute, `o_log`.
        """

        assert type(config_fn) is str
        assert type(results_fn) is str
        assert type(mpi_rank) in [type(None), int]
        assert type(mpi_size) in [type(None), int]
        assert type(o_log) in [type(None), PyposmatLogFile, str]

        # check to see if the paths provided are absolute paths
        assert os.path.isabs(config_fn)
        assert os.path.isabs(results_fn)

        if mpi_rank is None: mpi_rank = self.mpi_rank
        if mpi_size is None: mpi_size = self.mpi_size

        # get the absolute path of the datafile we are sampling from
        data_in_fn = None
        if os.path.isabs(
                self.configuration.sampling_type[i_iteration]['file']):
            data_in_fn = self.configuration.sampling_type[i_iteration]['file']
        else:
            data_in_fn = os.path.join(
                self.root_directory,
                self.configuration.sampling_type[i_iteration]['file'])

        data_out_fn = results_fn

        self.mc_sampler = PyposmatFileSampler(config_fn=config_fn,
                                              data_in_fn=data_in_fn,
                                              data_out_fn=data_out_fn,
                                              mpi_rank=mpi_rank,
                                              mpi_size=mpi_size,
                                              o_log=o_log,
                                              fullauto=False)

        self.mc_sampler.create_base_directories()
        self.mc_sampler.read_configuration_file()

        # we have to be able to find the structure directory
        self.mc_sampler.configuration.structures[
            'structure_directory'] = self.structure_directory
        self.mc_sampler.configure_qoi_manager()
        self.mc_sampler.configure_task_manager()
        self.mc_sampler.configure_datafile_out()
        self.mc_sampler.configure_pyposmat_badparameters_file()

        self.log_more_iteration_information()

    def initialize_rank_directory(self):
        """ create the rank directory

        This method defines the rank directory as an absolute path and stores it in
        the attribute `rank_directory`.  If a current directory exists there, then
        it is deleted with alll it's contents and then recreated.

        """
        rank_directory = os.path.join(
            self.root_directory, self.RANK_DIR_FORMAT.format(self.mpi_rank))

        # find the directory, delete it and it's constants and then recreates ot
        if os.path.isdir(rank_directory):
            shutil.rmtree(rank_directory)
        os.mkdir(rank_directory)

        self.rank_directory = rank_directory

    def run_simulations(self, i_iteration):
        """ run simulation for a single iteration

        Each rank is given a different execution context so that the disk IO 
        don't conflict
        """
        self.initialize_rank_directory()
        config_filename = self.configuration_filename
        results_filename = os.path.join(self.rank_directory,
                                        'pyposmat.results.out')
        bad_parameters_filename = os.path.join(self.rank_directory,
                                               'pyposmat.badparameters.out')

        # change execution context for this rank
        os.chdir(self.rank_directory)

        # set random seed
        self.determine_rv_seeds()
        self.log_random_seeds(i_iteration=i_iteration)

        sampling_type = self.configuration.sampling_type[i_iteration]['type']
        if self.mpi_rank == 0:
            self.log("sampling_type={}".format(sampling_type))
        MPI.COMM_WORLD.Barrier()

        # <----- parameter sampling type ---------------------------------------
        if sampling_type == 'parametric':
            self.initialize_sampler(config_fn=config_filename,
                                    results_fn=results_filename,
                                    mpi_rank=self.mpi_rank,
                                    mpi_size=self.mpi_size,
                                    o_log=self.o_log)

            self.run_parametric_sampling(i_iteration=i_iteration)

        # <----- kde sampling sampling type ---------------------------------------
        elif sampling_type == 'kde':
            self.initialize_sampler(config_fn=config_filename,
                                    results_fn=results_filename,
                                    mpi_rank=self.mpi_rank,
                                    mpi_size=self.mpi_size,
                                    o_log=self.o_log)

            self.run_kde_sampling(i_iteration=i_iteration)

        # <----- sampling from a file type ---------------------------------------
        # get parameters from file
        elif sampling_type == 'from_file':

            self.initialize_file_sampler(config_fn=config_filename,
                                         results_fn=results_filename,
                                         mpi_rank=self.mpi_rank,
                                         mpi_size=self.mpi_size,
                                         o_log=self.o_log)

            self.run_file_sampling(i_iteration=i_iteration)

        # <----- kde with clusters sampling type ---------------------------------------
        elif sampling_type == 'kde_w_clusters':
            cluster_fn = "pyposmat.cluster.{}.out".format(i_iteration)
            pyposmat_datafile_in = os.path.join(self.root_directory,
                                                self.data_directory,
                                                cluster_fn)

            _config_filename = os.path.join(self.root_directory,
                                            self.configuration_filename)

            # determine number of sims for this rank
            _mc_n_samples = _mc_config['n_samples_per_cluster']
            _n_samples_per_rank = int(_mc_n_samples / self.mpi_size)
            if _mc_n_samples % self.mpi_size > self.mpi_rank:
                _n_samples_per_rank += 1

            # initialize sampling object
            o = PyposmatClusterSampler(o_logger=self.log,
                                       mpi_rank=self.mpi_rank,
                                       mpi_comm=self.mpi_comm,
                                       mpi_size=self.mpi_size)
            o.create_base_directories()
            o.read_configuration_file(filename=_config_filename)
            # check to see if clustered data file exists
            if self.mpi_rank == 0:
                if not os.path.isfile(pyposmat_datafile_in):
                    kde_fn = "pyposmat.kde.{}.out".format(i_iteration)
                    kde_fn = os.path.join(self.root_directory,
                                          self.data_directory, kde_fn)
                    o.write_cluster_file(filename=kde_fn,
                                         i_iteration=i_iteration)
            MPI.COMM_WORLD.Barrier()

            o.configure_pyposmat_datafile_in(filename=pyposmat_datafile_in)
            # fix relative path to structure databae folder
            _structure_dir = o.configuration.structures['structure_directory']
            o.configuration.structures['structure_directory'] = \
                    os.path.join('..',_structure_dir)
            # finish the rest of the initialization
            o.configure_qoi_manager()
            o.configure_task_manager()
            o.configure_pyposmat_datafile_out()
            MPI.COMM_WORLD.Barrier()

            # run simulations
            o.run_simulations(i_iteration=i_iteration,
                              n_samples=_mc_n_samples,
                              filename=pyposmat_datafile_in)
            MPI.COMM_WORLD.Barrier()
        else:
            error_dict = OrderedDict([('i_iteration', i_iteration),
                                      ('sampling_type', sampling_type)])
            m = "unknown parameter sampling type: {}".format(sampling_type)
            m += "the valid sampling types are: {}".format(",".join(
                self.parameter_sampling_types))
            raise PyposmatSamplingTypeError(m, error_dict)

        # return to root directory
        os.chdir(self.root_directory)

    def initialize_data_directory(self, data_directory=None):
        """ determine the absolute path of the data directory and create it

        This method sets the `data_directory` attribute of the class and creates
        the `data directory` if the data directory already exists.

        Args:
            data_directory(str):the path of the data directory, the path can be 
                expressed in either a relative path, or an absolute path
        Returns:
            (str) the absolute path of the data directory
        Raises:
            OSError: if the directory is not able to be created
            
        """

        assert type(data_directory) in [type(None), str]
        assert type(self.data_directory) in [type(None), str]

        # determine the data directory path
        if data_directory is None:
            if self.data_directory is None:
                self.data_directory = os.path.join(self.root_directory, 'data')
            else:
                if os.path.isabs(self.data_directory):
                    self.data_directory = data_directory
                else:
                    self.data_directory = os.path.join(self.root_directory,
                                                       self.data_directory)
        elif os.path.isabs(data_directory):
            # absolute path
            self.data_directory = data_directory
        else:
            # create a absolute path from the relative path
            self.data_directory = os.path.join(self.root_directory,
                                               data_directory)
            self.data_directory = os.path.abspath(self.data_directory)

        # create data directory
        if self.mpi_rank == 0:
            try:
                os.mkdir(self.data_directory)
                self.log('created the data directory.')
                self.log('\tdata_directory;{}'.format(self.data_directory))
            except FileExistsError as e:
                self.log(
                    'attempted to create data directory, directory already exists.'
                )
                self.log('\tdata_directory:{}'.format(self.data_directory))
            except OSError as e:
                self.log(
                    'attempted to create data directory, cannot create directory.'
                )
                self.log('\tdata_directory:{}'.format(self.data_directory))
        MPI.COMM_WORLD.Barrier()

    def run_parametric_sampling(self, i_iteration):
        """ run parametric sampling 

        Args:
            i_iteration(int): what iteration of the sampling is happening
        """

        assert type(i_iteration) is int
        assert type(self.mc_sampler) is PyposmatMonteCarloSampler

        self.mc_sampler.run_simulations(
            i_iteration=i_iteration,
            n_samples=self.determine_number_of_samples_per_rank(
                i_iteration=i_iteration))

    def run_kde_sampling(self, i_iteration):
        """ run kde sampling

        Args:
            i_iteration(int): what iteration of the sampling is happening
        """
        is_debug = False

        assert type(i_iteration) is int
        assert type(self.mc_sampler) is PyposmatMonteCarloSampler

        kde_filename = os.path.join(self.data_directory,
                                    'pyposmat.kde.{}.out'.format(i_iteration))
        n_samples_per_rank = self.determine_number_of_samples_per_rank(
            i_iteration=i_iteration)

        if is_debug:
            print('cwd:{}'.format(os.getcwd()))
            print('mpi_rank:{},kde_filename:{}'.format(self.mpi_rank,
                                                       kde_filename))
            print('n_samples_per_rank:{}'.format(n_samples_per_rank))

        self.mc_sampler.run_simulations(i_iteration=i_iteration,
                                        n_samples=n_samples_per_rank,
                                        filename=kde_filename)

    def run_file_sampling(self, i_iteration):
        """ run file sampling

        Args:
            i_iteration(int): the iteration which to sampling for
        """
        assert type(i_iteration) is int
        assert type(self.mc_sampler) is PyposmatFileSampler

        if 'file' in self.configuration.sampling_type[i_iteration]:
            filename = os.path.join(
                self.root_directory,
                self.configuration.sampling_type[i_iteration]['file'])
        else:
            if os.path.isabs(self.data_directory):
                filename = os.path.join(
                    self.data_directory,
                    'pyposmat.kde.{}.out'.format(i_iteration))
            else:
                filename = os.path, join(
                    self.root_directory, self.data_directory,
                    'pyposmat.kde.{}.out'.format(i_iteration))

        if self.mpi_rank == 0:
            self.log(80 * '-')
            self.log('{:^80}'.format('file sampling'))
            self.log(80 * '-')
            self.log('filename_in:{}'.format(filename))
        MPI.COMM_WORLD.Barrier()

        self.mc_sampler.run_simulations(
            i_iteration=i_iteration,
            n_samples=self.determine_number_of_samples_per_rank(
                i_iteration=i_iteration),
            filename=filename)

    def determine_number_of_samples_per_rank(self,
                                             i_iteration,
                                             N_samples=None):
        """ determine the number of samples per rank

        The total number of samples needs to be broken up between the ranks, but roughly
        divided the work evenly.

        Args:
            i_iteration(int): which iteration we are in the simulation
            N_samples(int,optional): the total number of samples we are using for 
                this iteration.  If a number is provided, it will override 
                the number of simulations specified in the configuration file.
        Returns:
            (int): the number of samples for this rank
        """

        assert type(i_iteration) is int
        assert type(N_samples) in [type(None), int]
        assert type(self.configuration) is PyposmatConfigurationFile

        if N_samples is None:
            N_samples = self.configuration.sampling_type[i_iteration][
                'n_samples']

        N_samples_per_rank = int(N_samples / self.mpi_size)
        if N_samples % self.mpi_size > self.mpi_rank:
            N_samples_per_rank += 1

        return N_samples_per_rank

    def initialize_logger(self, log_fn=None, log_to_stdout=None):
        """initialize log object
        
        Args:
            log_fn(str,optional)

        """

        assert type(log_fn) in [type(None), str]
        assert type(log_to_stdout) in [type(None), bool]

        if log_fn is None:
            self.log_fn = os.path.join(self.root_directory,
                                       self.data_directory, 'pyposmat.log')
        else:
            self.log_fn = log_fn
        self.o_log = PyposmatLogFile(filename=self.log_fn)

        self.log_to_stdout = log_to_stdout

    def setup_mpi_environment(self):
        self.mpi_comm = MPI.COMM_WORLD
        self.mpi_rank = self.mpi_comm.Get_rank()
        self.mpi_size = self.mpi_comm.Get_size()
        self.mpi_procname = MPI.Get_processor_name()
        self.log_mpi_environment()

    # random seed management
    def determine_rv_seeds(self, seed=None, i_iteration=None):
        """ set the random variable seed across simulations 
        
        Args:
           seed(int,optional)=a seed to determine the rest of the seeds for
               different ranks and iterations.
        """
        RAND_INT_LOW = 0
        RAND_INT_HIGH = 2147483647

        assert type(seed) in [type(None), int]
        assert type(i_iteration) in [type(None), int]

        if type(i_iteration) is type(None):
            i_iteration = self.i_iteration

        # set the seed attribute
        if type(seed) is int:
            self.rv_seed == seed

        # set the seed attribute, if the seed attribute is none
        if self.rv_seed is None:
            self.rv_seed = np.random.randint(low=RAND_INT_LOW,
                                             high=RAND_INT_HIGH)

        # if the rv_seed was determined in the script, then all ranks will
        # have the same rv_seed attribute
        np.random.seed(self.rv_seed)

        # each rank, will need it's own seed.  So we sample from the freshly
        # generated random number generator, which is identical across ranks
        self.rv_seeds = np.random.randint(low=0,
                                          high=2147483647,
                                          size=(int(self.mpi_size),
                                                self.n_iterations))

        # now restart the seed for this rank
        np.random.seed(self.rv_seeds[self.mpi_rank, i_iteration])

    # logging methods
    def log(self, s):
        if self.log_to_stdout:
            print(s)
        if self.o_log is not None:
            self.o_log.write(s)

    def log_iteration_information(self, i_iteration):
        """log iteration information
        
        Args:
            i_iteration_id(int):the iteration number
        Returns:
            (str) the log string
        """
        if self.mpi_rank == 0:
            s = self.SECTION_HEADER_FORMAT.format(
                'Begin Iteration {}/{}'.format(i_iteration + 1,
                                               self.n_iterations))
            self.log(s)
        MPI.COMM_WORLD.Barrier()

        #if self.mpi_rank == 0:
        #    return "\n".join(s)

    def log_more_iteration_information(self):
        #TODO: this logging needs to go into a separate logging method. -EJR
        if self.mpi_rank == 0:
            self.mc_sampler.print_structure_database()
            self.mc_sampler.print_sampling_configuration()
        if self.mpi_rank == 0 and self.i_iteration == 0:
            self.mc_sampler.print_initial_parameter_distribution()
        if self.mpi_rank == 0:
            self.log(80 * '-')
        MPI.COMM_WORLD.Barrier()

    def log_mpi_environment(self):
        if self.mpi_rank == 0:
            m = [
                self.SECTION_HEADER_FORMAT.format(
                    'MPI communication information')
            ]

            m += ['mpi_size={}'.format(self.mpi_size)]

        MPI.COMM_WORLD.Barrier()

    def log_random_seeds(self, i_iteration):
        if self.mpi_rank == 0:
            self.log(80 * '-')
            self.log('{:^80}'.format('GENERATED RANDOM SEEDS'))
            self.log(80 * '-')
            self.log('global_seed:{}'.format(str(self.rv_seed)))
            self.log('seeds_for_this_iteration:')
            self.log('{:^8} {:^8}'.format('rank', 'seed'))
            self.log('{} {}'.format(8 * '-', 8 * '-'))
        MPI.COMM_WORLD.Barrier()
        for i_rank in range(self.mpi_size):
            if self.mpi_rank == i_rank:
                self.log('{:^8} {:>10}'.format(
                    i_rank, self.rv_seeds[i_rank, i_iteration]))
        MPI.COMM_WORLD.Barrier()

    def get_results_dict(self):
        rd = OrderedDict()
        rd['mpi'] = OrderedDict()
        rd['mpi']['size'] = self.mpi_size

    def analyze_data_directories(self, data_dir=None):
        _d = data_dir
        i = 0
        contents = []
        if not os.path.exists(_d): return i, contents
        if not os.path.isdir(_d): return i, contents

        while True:
            kde_fn = os.path.join(_d, "pyposmat.kde.{}.out".format(i))
            if os.path.exists(kde_fn):
                contents.append(kde_fn)
            else:
                if i > 0:
                    contents.append(results_fn)
                    break

            results_fn = os.path.join(_d, "pyposmat.results.{}.out".format(i))
            if os.path.exists(results_fn): pass
            else: break
            i = i + 1

        return i, contents

    def analyze_rank_directories(self, root_dir=None):
        i = 0
        contents = []

        if root_dir is None:
            _d = self.root_directory
        else:
            _d = root_directory

        while True:
            rank_dir = os.path.join(_d, "rank_{}".format(i))
            if not os.path.exists(rank_dir):
                break
            if not os.path.isdir(rank_dir):
                break

            rank_fn = os.path.join("rank_{}".format(i), "pyposmat.results.out")
            if not os.path.exists(os.path.join(_d, rank_fn)):
                break
            if not os.path.isfile(os.path.join(_d, rank_fn)):
                break
            else:
                contents.append(rank_fn)
            i = i + 1
        return i, contents

    def find_initial_parameters_file(self):
        if 'file' in self.configuration.sampling_type[0]:
            _init_fn = os.path.join(
                self.root_directory,
                self.configuration.sampling_type[0]['file'])
            if os.path.exists(_init_fn):
                if os.path.isfile(_init_fn):
                    return _init_fn
                else:
                    return None

    def merge_data_files(self,
                         i_iteration,
                         last_datafile_fn=None,
                         new_datafile_fn=None):
        """ merge the pyposmat data files

        Args:
            i_iteration(int): the current iteration which just finished
            last_datafile_fn(str,optional): the filename of the last dataset in the data directory.
            new_datafile_fn(str,optional): where to output the file results 
        """

        if last_datafile_fn is None:
            last_datafile_fn = os.path.join(
                self.data_directory, 'pyposmat.kde.{}.out'.format(i_iteration))

        if new_datafile_fn is None:
            new_datafile_fn = os.path.join(
                self.data_directory,
                'pyposmat.results.{}.out'.format(i_iteration))

        data_dir = self.data_directory
        rank_dirs = [
            v for v in os.listdir(self.root_directory) if v.startswith('rank_')
        ]
        filenames = [
            os.path.join(self.root_directory, v, 'pyposmat.results.out')
            for v in rank_dirs
        ]

        data = None
        for i, v in enumerate(filenames):
            data_new = None
            if i == 0:
                data = PyposmatDataFile()
                data.read(filename=v)
            else:
                data_new = PyposmatDataFile()
                data_new.read(filename=v)

                data.df = pd.concat([data.df, data_new.df])

        nrows = len(data.df)

        if self.configuration.sampling_type[i_iteration][
                'type'] == 'from_file':
            pass
        else:
            sim_id_fmt = '{:0>2}_{:0>6}'
            sim_id_str = [
                sim_id_fmt.format(i_iteration, i) for i in range(nrows)
            ]
            data.df['sim_id'] = [
                sim_id_fmt.format(i_iteration, i) for i in range(nrows)
            ]

        if self.configuration.sampling_type[i_iteration][
                'type'] == "from_file":
            data_new = PyposmatDataFile()
            data_new.read(filename=filenames[0])
            data_new.df = data.df
            data_new.write(filename=new_datafile_fn)
        else:
            self.log("merging with candidates from previous simulations")
            self.log("\tfilename:{}".format(last_datafile_fn))
            data_old = PyposmatDataFile()
            try:
                data_old.read(filename=last_datafile_fn)
                data_old.df = pd.concat([data_old.df, data.df])
                data_old.write(filename=new_datafile_fn)
            except FileNotFoundError as e:
                if i_iteration == 0:
                    data.write(filename=new_datafile_fn)
                else:
                    raise

    def merge_error_files(self, i_iteration):
        """ merge the pyposmat data files

        Args:
            i_iteration(int): the current iteration which just finished
            last_datafile_fn(str,optional): the filename of the last dataset in the data directory.
            new_datafile_fn(str,optional): where to output the file results 
        """

        badparameters_fn = os.path.join(self.data_directory,
                                        'pyposmat.badparameters.out')

        data_dir = self.data_directory
        rank_dirs = [
            v for v in os.listdir(self.root_directory) if v.startswith('rank_')
        ]
        filenames = [
            os.path.join(self.root_directory, v, 'pyposmat.badparameters.out')
            for v in rank_dirs
        ]

        # consolidate rank directories
        badparameters_new = None
        badparameters_next = None
        for i, v in enumerate(filenames):
            if badparameters_new is None:
                try:
                    badparameters_new = PyposmatBadParametersFile(
                        o_config=self.configuration)
                    badparameters_new.read(filename=v)
                except FileNotFoundError as e:
                    self.log("no bad parameters file at {}".format(v))

            else:
                try:
                    badparameters_next = PyposmatBadParametersFile(
                        o_config=self.configuration)
                    badparameters_next.read(filename=v)
                    badparameters_new.df = pd.concat(
                        [badparameters_new.df, badparameters_next.df])
                except FileNotFoundError as e:
                    self.log("no bad parameters file as {}".format(v))

        # determine the sim_id for bad parameters of the sim_id
        if badparameters_new.df is None:
            # no previous bad paramters found
            # TODO: need to implement something here to deal with bad parameters
            pass

        else:
            nrows = len(badparameters_new.df)
            sim_id_fmt = '{:0>2}_{:0>6}'
            sim_id_str = [
                sim_id_fmt.format(i_iteration, i) for i in range(nrows)
            ]
            badparameters_new.df['sim_id'] = sim_id_str

            if self.configuration.sampling_type[i_iteration][
                    'type'] == "from_file":
                badparameters_new.write(filename=badparameters_fn)

            else:
                self.log(
                    "merging with bad candidates from previous simulations")
                self.log("\tfilename:{}".format(badparameters_fn))
                badparameters = PyposmatBadParametersFile(
                    o_config=self.configuration)

                try:
                    badparameters.read(filename=badparameters_fn)
                    badparameters.df = pd.concat(
                        [badparameters.df, badparameters_new.df])
                    badparameters.write(filename=badparameters_fn)
                except FileNotFoundError as e:
                    if i_iteration == 0:
                        badparameters_new.write(filename=badparameters_fn)
                    else:
                        raise

    def analyze_results(self,
                        i_iteration,
                        data_fn=None,
                        config_fn=None,
                        kde_fn=None,
                        analysis_fn=None):
        """ analyze the results of the simulation

        this method analyzes the results of the simulation, and does post simulation
        tasks, such as filtering by qoi performance, pareto optimization, etc.

        Args:
            data_fn(str): the path of the data file.  By default this is set to none 
                where the the file will be determine by i_iteration and internal 
                attributes
            config_fn(str): the path of the data file.  By default this is set to none 
                where the the file will be determine by i_iteration and internal 
                attributes
            kde_fn(str): the path of the data file.  By default this is set to none 
                where the the file will be determine by i_iteration and internal 
                attributes
        """

        if data_fn is None:
            data_fn = os.path.join(\
                    self.root_directory,
                    self.data_directory,
                    'pyposmat.results.{}.out'.format(i_iteration))
        if config_fn is None:
            config_fn = os.path.join(\
                    self.root_directory,
                    self.configuration_filename)
        if kde_fn is None:
            kde_fn = os.path.join(\
                    self.root_directory,
                    self.data_directory,
                    'pyposmat.kde.{}.out'.format(i_iteration+1))
        if analysis_fn is None:
            analysis_fn = os.path.join(self.root_directory,
                                       self.data_directory,
                                       'pyposmat.analysis.out')

        data_analyzer = PyposmatDataAnalyzer()
        data_analyzer.initialize_configuration(config_fn=config_fn)

        data_analyzer.analyze_results_data(i_iteration, filename=data_fn)

        assert isinstance(data_analyzer.results_statistics, OrderedDict)

        if os.path.isfile(analysis_fn):
            data_analyzer.read_analysis_file(filename=analysis_fn)

        self.log(
            data_analyzer.str__results_descriptive_statistics(
                statistics=data_analyzer.results_statistics))
        self.log(data_analyzer.str__qoi_filtering_summary())

        data_analyzer.write_kde_file(filename=kde_fn)
        data_analyzer.analyze_kde_data(i_iteration, filename=kde_fn)

        assert isinstance(data_analyzer.kde_statistics, OrderedDict)
        self.log(
            data_analyzer.str__kde_descriptive_statistics(
                statistics=data_analyzer.kde_statistics))

        data_analyzer.update_analysis(i_iteration)
        data_analyzer.write_analysis_file(filename=analysis_fn)

    def read_configuration_file(self, filename=None):

        assert type(filename) in [type(None), str]
        assert type(self.configuration_filename) in [type(None), str]

        if filename is not None:
            self.configuration_filename = filename

        if not os.path.isabs(self.configuration_filename):
            self.configuration_filename = os.path.abspath(
                self.configuration_filename)

        self.configuration = PyposmatConfigurationFile()
        self.configuration.read(filename=self.configuration_filename)

        if self.mpi_rank == 0:
            self._write_parameter_names()
            self._write_qoi_names()
            self._write_error_names()

    def _write_parameter_names(self, parameter_names=None):
        if parameter_names is None: _parameter_names = self.parameter_names
        else: _parameter_names = parameter_names

        s = [80 * '-']
        s += ['{:^80}'.format('PARAMETER_NAMES')]
        s += [80 * '-']
        s += [p for p in _parameter_names]

        self.log("\n".join(s))

    def _write_qoi_names(self, qoi_names=None):
        if qoi_names is None: _qoi_names = self.qoi_names
        else: _qoi_names = qoi_names

        s = [80 * '-']
        s += ['{:^80}'.format('QOI_NAMES')]
        s += [80 * '-']
        s += [p for p in _qoi_names]

        self.log("\n".join(s))

    def _write_error_names(self, error_names=None):
        if error_names is None: _error_names = self.error_names
        else: _error_names = error_names

        s = [80 * '-']
        s += ['{:^80}'.format('ERROR_NAMES')]
        s += [80 * '-']
        s += [p for p in _error_names]

        self.log("\n".join(s))