def dev__get_header_string():
    testing_set = get_testing_set()

    print(os.path.isfile(testing_set['config_fn']), testing_set['config_fn'])

    o = PyposmatBadParametersFile(filename=testing_set['badparameters_out_fn'],
                                  config_fn=testing_set['config_fn'])
    print(o.get_header_string())
Example #2
0
    def configure_pyposmat_badparameters_file(self, filename=None):
        if filename is not None:
            assert type(filename) is str
            self.pyposmat_badparameters_filename = filename

        self.pyposmat_badparameters = PyposmatBadParametersFile(
            filename=self.pyposmat_badparameters_filename,
            o_config=self.configuration)
def test__write_header_string__no_args():
    testing_set = get_testing_set()

    o = PyposmatBadParametersFile(filename=testing_set['badparameters_out_fn'],
                                  config_fn=testing_set['config_fn'])
    o.write_header_section()

    assert os.path.isfile(testing_set['badparameters_out_fn'])

    if os.path.isfile(testing_set['badparameters_out_fn']):
        os.remove(testing_set['badparameters_out_fn'])
def test__write_header_string__w_filename():
    testing_set = get_testing_set()
    badparameters_out_fn = 'pyposmat.badparameters.test'

    o = PyposmatBadParametersFile(filename=testing_set['badparameters_out_fn'],
                                  config_fn=testing_set['config_fn'])
    o.write_header_section(filename=badparameters_out_fn)

    assert os.path.isfile(badparameters_out_fn)

    if os.path.isfile(badparameters_out_fn):
        os.remove(badparameters_out_fn)
def test__write_header_section():
    parameter_names = ['a,b,c']
    filename = 'test.out'

    if os.path.isfile(filename):
        os.remove(filename)

    f = PyposmatBadParametersFile()
    f.write_header_section(parameter_names=parameter_names, filename=filename)

    assert os.path.isfile(filename)

    if os.path.isfile(filename):
        os.remove(filename)
def dev__get_header_string():
    parameter_names = ['a', 'b', 'c']
    filename = 'test.out'

    f = PyposmatBadParametersFile()
    s = f.get_header_string(parameter_names=parameter_names)

    header_line_1 = ['sim_id'] + parameter_names + ['reason']
    header_line_2 = ['sim_id'] + len(parameter_names) * ['param'] + ['reason']
    s_test = "{}\n".format(",".join(header_line_1))
    s_test += "{}\n".format(",".join(header_line_2))
    print(s_test)
    print(s)
    print(s_test == s)
def dev__write_header_section():
    print(80 * '-')
    print('{:^80}'.format('write_header_section'))
    print(80 * '-')

    testing_set = get_testing_set()

    if os.path.isfile(testing_set['filename']):
        os.remove(testing_set['filename'])

    f = PyposmatBadParametersFile()
    f.write_header_section(parameter_names=testing_set['parameter_names'],
                           filename=testing_set['filename'])

    with open(testing_set['filename'], 'r') as f:
        print(f.read())
def test__get_header_string():
    f = PyposmatBadParametersFile()
    parameter_names = ['a', 'b', 'c']
    filename = 'test.out'

    f = PyposmatBadParametersFile()
    s = f.get_header_string(parameter_names=parameter_names)

    assert type(s) is str

    header_line_1 = ['sim_id'] + parameter_names + ['reason']
    header_line_2 = ['sim_id'] + len(parameter_names) * ['param'] + ['reason']
    s_test = "{}\n".format(",".join(header_line_1))
    s_test += "{}\n".format(",".join(header_line_2))

    assert s_test == s
Example #9
0
def test____init____w_filename():
    test_badparameters_fn = 'test_filename.out'

    if os.path.isfile(test_badparameters_fn):
        os.remove(test_badparameters_fn)

    o = PyposmatBadParametersFile(filename=test_badparameters_fn)
    assert o.filename == test_badparameters_fn
    assert not os.path.isfile(test_badparameters_fn)
    assert o.configuration is None
Example #10
0
def test__read():
    testing_set = get_testing_set()

    assert os.path.isfile(testing_set['badparameters_in_fn'])
    assert os.path.isfile(testing_set['config_fn'])

    if os.path.isfile(testing_set['badparameters_out_fn']):
        m = "removing the badparameter_out_file:{}".format(
                testing_set['badparameters_out_fn'])
        print(m)
        os.remove(testing_set['badparameters_out_fn'])

    o = PyposmatBadParametersFile(
            filename=testing_set['badparameters_out_fn'],
            config_fn=testing_set['config_fn'])

    o.read(filename=testing_set['badparameters_in_fn'])

    assert isinstance(o.df,pd.DataFrame)
def test__get_header_string():
    testing_set = get_testing_set()

    o_config = PyposmatConfigurationFile()
    o_config.read(filename=testing_set['config_fn'])
    assert os.path.isfile(testing_set['config_fn'])

    f = PyposmatBadParametersFile(filename=testing_set['badparameters_out_fn'],
                                  config_fn=testing_set['config_fn'])
    s = f.get_header_string()

    assert type(s) is str

    header_line_1 = ['sim_id'] \
            + o_config.parameter_names \
            + ['reason']
    header_line_2 = ['sim_id'] \
            + len(o_config.parameter_names)*['param'] \
            + ['reason']
    s_test = "{}\n".format(",".join(header_line_1))
    s_test += "{}\n".format(",".join(header_line_2))

    assert s_test == s
Example #12
0
def test____init____w_filename_config_fn():
    testing_set = get_testing_set()

    assert os.path.isfile(testing_set['badparameters_in_fn'])
    assert os.path.isfile(testing_set['config_fn'])

    if os.path.isfile(testing_set['badparameters_out_fn']):
        m = "removing the badparameter_out_file:{}".format(
            testing_set['badparameters_out_fn'])
        print(m)
        os.remove(testing_set['badparameters_out_fn'])

    o = PyposmatBadParametersFile(filename=testing_set['badparameters_out_fn'],
                                  config_fn=testing_set['config_fn'])

    assert o.filename == testing_set['badparameters_out_fn']

    from pypospack.pyposmat.data import PyposmatConfigurationFile
    o_config = PyposmatConfigurationFile()
    o_config.read(filename=testing_set['config_fn'])
    assert isinstance(o.configuration, PyposmatConfigurationFile)
    assert isinstance(o.parameter_names, list)
    assert set(o.parameter_names) == set(o_config.parameter_names)
def dev__write_simulation_exception():
    import importlib

    testing_set = get_testing_set()

    o = PyposmatBadParametersFile()
    o.write_header_section(parameter_names=testing_set['parameter_names'],
                           filename=testing_set['filename'])

    module_name = 'pypospack.exceptions'
    module = importlib.import_module(module_name)

    class_names = ['LammpsSimulationError']
    for class_name in class_names:
        sim_id = class_name
        m = "message"
        exception = getattr(module,
                            class_name)(m,
                                        parameters=testing_set['parameters'])
        o.write_simulation_exception(sim_id=sim_id, exception=exception)
Example #14
0
class PyposmatMonteCarloSampler(PyposmatEngine):
    def __init__(self,
                 filename_in='pyposmat.config.in',
                 filename_out='pyposmat.results.out',
                 o_log=None,
                 mpi_rank=None,
                 mpi_size=None,
                 base_directory=None):
        """Additional attributes are set by the base class :obj:PyposmatEngine

        Args:
            filename_in (str) - path of the configuration file
            filename_out (str) - path of the output file
            o_log (PyposmatLogFile) - if type(o_log) is a string, then the string is treated as a path in which to log information to.  If type(o_log) is PyposmatLogFile then it is set as an attribute for the refernce.
            mpi_rank (int)
            mpi_size (int)
            base_directory (str,optional): Either the relative or full path which provides a
        unique drive addressing space for simultaneously running simulations.
        Attributes:
            mpi_rank (int) - this is passed in
            mpi_size (int) - this is passed in
            pyposmat_data_in_filename (str) - the path of the datafile to read in
            pyposmat_data_out_filename (str) - the path of the datafile to write simulation results to
        """
        assert isinstance(filename_in, str)
        assert isinstance(filename_out, str)
        assert type(base_directory) in [str, type(None)]

        PyposmatEngine.__init__(self,
                                filename_in=filename_in,
                                filename_out=filename_out,
                                base_directory=base_directory,
                                fullauto=False)

        if mpi_rank is None:
            self.mpi_rank = 0
        else:
            self.mpi_rank = mpi_rank

        if mpi_size is None:
            self.mpi_size = 1
        else:
            self.mpi_size = mpi_size

        assert self.mpi_rank < self.mpi_size

        self.mpi_rank = mpi_rank
        self.mpi_size = mpi_size
        self.pyposmat_data_in_filename = None
        self.pyposmat_data_out_filename = filename_out
        self.pyposmat_badparameters_filename = 'pyposmat.badparameters.out'

        try:
            self.configure_logger(o_log)
        except TypeError as e:
            m = "Unable to to configure obj_log based on attribute log:{}".format(
                str(o_log))
            raise TypeError(m)

    def configure_logger(self, o_log=None):
        """
        Configurtion of the log object has different behavior based upon the type passed
        into the argument o_log.  If o_log is PyposmatLogFile, that object will be accessed
        by reference.  A string is assumed to be a filename location.  By default the
        argument for o_log is None, which means logging will go to standard out by means of 
        the print() function.

        Args:
            o_log (str,PyposmatLogFile,None): default: None
        """

        if type(o_log) is PyposmatLogFile:
            self.obj_log = o_log
        elif type(o_log) is str:
            self.obj_log = PyposmatLogFile(filename=o_log)
        elif o_log is None:
            self.obj_log = None
        else:
            m = "log object must be str, PyposmatLogFile, or None"
            raise TypeError(m)

    def log(self, str_msg):
        if type(str_msg) is str:
            m = str_msg
        elif type(str_msg) is list:
            m = "\n".join(str_msg)

        if type(self.obj_log) is PyposmatLogFile:
            self.obj_log.write(m)
        print(m)

    def configure_pyposmat_datafile_in(self, filename):
        self.pyposmat_data_in_filename = filename
        self.pyposmat_datafile_in = PyposmatDataFile(filename)

    def configure_pyposmat_datafile_out(self, filename=None):
        if filename is not None:
            assert type(filename) is str
            self.pyposmat_data_out_filename = filename
        self.pyposmat_datafile_out = PyposmatDataFile(filename)

    def configure_pyposmat_badparameters_file(self, filename=None):
        if filename is not None:
            assert type(filename) is str
            self.pyposmat_badparameters_filename = filename

        self.pyposmat_badparameters = PyposmatBadParametersFile(
            filename=self.pyposmat_badparameters_filename,
            o_config=self.configuration)

    def read_configuration_file(self, filename=None):
        PyposmatEngine.read_configuration_file(self, filename=filename)
        # self.structure_directory = self.configuration.structures['structure_directory']
        self.n_iterations = self.configuration.sampling_type['n_iterations']
        self.parameter_names = [
            p for p in self.configuration.sampling_distribution
        ]
        self.qoi_names = [k for k in self.configuration.qois]
        self.error_names = ['{}.err'.format(k) for k in self.qoi_names]
        self.parameter_distribution_definition =\
                self.configuration.sampling_distribution

        try:
            self.free_parameter_names = [
                k for k, v in self.parameter_distribution_definition.items()
                if v[0] != 'equals'
            ]
        except KeyError as e:
            print(self.parameter_distribution_definition.items())
            raise
        if self.configuration.sampling_constraints is not None:
            self.parameter_constraints = copy.deepcopy(
                self.configuration.sampling_constraints)
        else:
            self.parameter_constraints = OrderedDict()

        self.constrained_parameter_names = []
        for p in self.parameter_names:
            if p not in self.free_parameter_names:
                self.constrained_parameter_names.append(p)

    def run_simulations(self, i_iteration, n_samples=None, filename=None):
        """

        Args:
            i_iteration(int): the iteration cycle we are on.
            n_samples(int,optional): the number of parameters to evaluate
            filename(str,optional): the filename
        """

        assert type(i_iteration) is int
        assert type(n_samples) in [type(None), int]
        assert type(filename) in [type(None), str]

        i = i_iteration
        _sampling_type = self.configuration.sampling_type[i]['type']
        _n_samples = self.configuration.sampling_type[i]['n_samples']

        if self.mpi_rank == 0:
            m = [
                "R{}: Starting iteration N={}".format(self.mpi_rank,
                                                      i_iteration)
            ]
            if _sampling_type is "from_file":
                m += [
                    "R{}: Sampling parameters from {}".format(
                        self.mpi_rank, filename)
                ]
            else:
                m += [
                    "R{}: Attemping n_samples={} with sampling_type={}".format(
                        self.mpi_rank, _n_samples, _sampling_type)
                ]
            if filename is not None:
                m += ["R{}: Using file:{}".format(self.mpi_rank, filename)]
            self.log(m)

        if n_samples is not None:
            _n_samples = n_samples

        if _sampling_type == 'parametric':
            self.run_parameteric_sampling(n_samples=_n_samples)
        elif _sampling_type == 'kde':
            if filename is None:
                raise ValueError('cannot do kde sampling with out filename')
            self.run_kde_sampling(n_samples=_n_samples, filename_in=filename)
        elif _sampling_type == 'from_file':
            if filename is None:
                raise ValueError('cannot do filesampling without file')
            self.run_file_sampling(filename)
        else:
            raise ValueError('unknown sampling type:{}'.format(_sampling_type))

    def write_badparameters_header(self):
        self.pyposmat_badparameters.write_header_section(
            filename=self.pyposmat_badparameters_filename)

    def write_data_out_header(self):
        self.pyposmat_datafile_out.write_header_section(
            filename=self.pyposmat_data_out_filename,
            parameter_names=self.parameter_names,
            qoi_names=self.qoi_names,
            error_names=self.error_names)

    def get_sim_id(self, i, s=None):
        if s is not None:
            return s
        elif isinstance(i, int):
            return str(i)
        else:
            m = 'cannot determine sim_id from i:{} and s:{}'.format(i, s)
            raise TypeError(m)

    def run_parameteric_sampling(self, n_samples):

        # create random number generator
        _rv_generators = OrderedDict()
        for p in self.free_parameter_names:
            distribution_type = self.parameter_distribution_definition[p][0]
            if distribution_type == 'uniform':
                _a = self.parameter_distribution_definition[p][1]['a']
                _b = self.parameter_distribution_definition[p][1]['b']
                _loc = _a
                _scale = _b - _a
                _rv_generators[p] = scipy.stats.uniform(loc=_loc, scale=_scale)
            elif distribution_type == 'normal':
                _mu = self.parameter_distribution_definition[p][1]['mu']
                _sigma = self.parameter_distribution_definition[p][1]['sigma']
                _loc = _mu
                _scale = _sigma
                _rv_generators[p] = scipy.stats.norm(loc=_loc, scale=_scale)
            else:
                raise ValueError(
                    'unknown distribution type: {}'.format(distribution_type))

        self.write_data_out_header()
        self.write_badparameters_header()

        time_start_iteration = time.time()
        _n_errors = 0

        for i_sample in range(n_samples):
            # determin sim_id
            sim_id = self.get_sim_id(i=i_sample)

            # new OrderedDict to hold in parameter values
            _parameters = OrderedDict([(p, None)
                                       for p in self.parameter_names])

            # generate free parameters for ordered dictionary
            for p in self.free_parameter_names:
                _parameters[p] = _rv_generators[p].rvs(size=1)[0]

            # determine parameters determined from equality constraints
            for p in self.constrained_parameter_names:
                _constraint_type = self.parameter_distribution_definition[p][0]
                if _constraint_type == 'equals':

                    # this condition is for fitting EoS for EAM function which
                    # requires a refernce ground state crystal structure
                    if p.endswith('latticetype'):
                        _v = self.parameter_distribution_definition[p][1]
                        _parameters[p] = _v

                    # process evaluation strings
                    elif type(self.parameter_distribution_definition[p]
                              [1]) is not list:
                        _str_eval = str(
                            self.parameter_distribution_definition[p][1])

                        # replace string values with numerical values
                        for fp in self.free_parameter_names:
                            if fp in _str_eval:
                                _str_eval = _str_eval.replace(
                                    fp, str(_parameters[fp]))

                        # evaluate the string into a float
                        _parameters[p] = eval(_str_eval)
                    else:
                        raise ValueError("oops")

            # additional tasks added here
            for p in self.constrained_parameter_names:
                if self.parameter_distribution_definition[p][0] == 'equals':
                    if type(self.parameter_distribution_definition[p]
                            [1]) is list:
                        # required for EAM potentials to calculate dens_max for embedding function
                        if self.parameter_distribution_definition[p][1][
                                0] == 'equilibrium_density':
                            a0 = self.parameter_distribution_definition[p][1][
                                1]
                            latt = self.parameter_distribution_definition[p][
                                1][2]
                            _parameters[
                                p] = self.calculate_equilibrium_density(
                                    a0, latt, _parameters)

            try:
                # check constraints
                for k, v in self.parameter_constraints.items():
                    _eval_str = v
                    for pn, pv in _parameters.items():
                        _eval_str = _eval_str.replace(pn, str(pv))

                    if eval(_eval_str) is False:
                        m = "failed parameter constraint, {}".format(k)
                        raise PyposmatBadParameterError(m,
                                                        parameters=_parameters)

                _results = self.evaluate_parameter_set(parameters=_parameters)
            except PyposmatBadParameterError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except LammpsSimulationError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except PypospackTaskManagerError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except PypospackBadEamEosError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            else:

                #if type(sim_id) is float:
                #    _sim_id = int(sim_id)

                _sim_id = "{}".format(i_sample)

                self.pyposmat_datafile_out.write_simulation_results(
                    filename=self.pyposmat_data_out_filename,
                    sim_id=_sim_id,
                    results=_results)
            finally:
                # print out summaries every 10 solutions
                if (i_sample + 1) % 10 == 0:
                    n_samples_completed = i_sample + 1
                    time_end = time.time()
                    time_total = time_end - time_start_iteration
                    avg_time = time_total / n_samples_completed
                    _str_msg = 'R{}:{} samples completed in {:.4f}s. Avg_time = {:.4f}. n_errors = {}'.format(
                        self.mpi_rank, n_samples_completed, time_total,
                        avg_time, _n_errors)
                    self.log(_str_msg)

    def get_options_kde_bandwidth(self):
        """
        Returns:
            OrderedDict
        """

        kde_options = OrderedDict()
        kde_options['chiu1999'] = OrderedDict()
        kde_options['chiu1999'][
            'reference'] = 'Chiu, S.T. Ann. Stat. 1991, Vol. 19, No 4. 1883-1905'
        kde_options['chiu1999']['doi'] = '10.1214/aos/1176348376'
        kde_options['chiu1999']['description'] = ""
        kde_options['silverman1984'] = OrderedDict()
        kde_options['silverman1984'][
            'reference'] = 'Silverman, B.W. (1986). Density Estimation for Statistics and Data Analysis. London: Chapman & Hall/CRC. p. 48'
        kde_options['silverman1984']['isbn'] = '0-412-24620-1'

    def determine_kde_bandwidth(self, X, kde_bw_type):
        """ determine kde bandwidth

        Args:
            X(np.ndarray): array of data to determine the KDE bandwidth
            kde_bw_type(str): the method of estimating the optimal bandwidth
        """

        if self.mpi_rank == 0:
            self.log('determine kde bandwidth...')

        if kde_bw_type == 'chiu1999':
            try:
                h = Chiu1999_h(X)
            except ValueError as e:
                print(X)
                raise

        elif kde_bw_type == 'silverman1985':
            h = Silverman1986
        else:
            m = 'kde_bw_type, {}, is not an implemented bandwidth type'
            raise PypospackBadKdeBandwidthType(m)

        if self.mpi_rank == 0:
            self.log('{}:{}'.format(kde_bw_type, h))
        self.kde_bw_type = kde_bw_type
        self.kde_bw = h

        return self.kde_bw

    def run_kde_sampling(self,
                         n_samples,
                         filename_in,
                         cluster_id=None,
                         kde_bw_type='chiu1999'):
        """ sample from a KDE distribution

        Args:
            n_samples(int): the number of samples to draw from the KDE distribution
            filename_in(str): the path to the datafile from which the parameters will be drawn from
            cluster_id(int): if we need to use a specific cluster_id, we specify it here.  
                otherwise, it will be drawn from all parameters contained within the set.
            kde_bw_type(str): the method of estimating the optimal bandwidth
        """
        _datafile_in = PyposmatDataFile()
        _datafile_in.read(filename_in)

        if cluster_id is None:
            _free_parameter_names = [str(v) for v in self.free_parameter_names]
            _X = _datafile_in.df[_free_parameter_names].values.T
        else:
            # subselect the dataframe by the cluster_id of interest
            _datafile_in.df = _datafile_in.df.loc[_datafile_in.df['cluster_id']
                                                  == cluster_id]
            _X = _datafile_in.df[self.free_parameter_names].loc[
                _datafile_in.df['cluster_id'] == cluster_id].values.T
            # self.log.write("cluster_id {c} _X.shape={x}".format(c=cluster_id, x=_X.shape))

        kde_bw = self.determine_kde_bandwidth(X=_X, kde_bw_type=kde_bw_type)

        _rv_generator = scipy.stats.gaussian_kde(_X, kde_bw)

        self.write_data_out_header()
        self.write_badparameters_header()

        time_start_iteration = time.time()
        _n_errors = 0

        for i_sample in range(n_samples):
            # determine sim_id
            sim_id = self.get_sim_id(i=i_sample)

            # new OrderedDict to hold in parameter values
            _parameters = OrderedDict([(p, None)
                                       for p in self.parameter_names])

            # generate free parameters for ordered dictionary
            _free_parameters = _rv_generator.resample(1)
            for i, v in enumerate(self.free_parameter_names):
                _parameters[v] = float(_free_parameters[i, 0])

            # determine parameters determined from equality constraints
            for p in self.constrained_parameter_names:
                _constraint_type = self.parameter_distribution_definition[p][0]
                if _constraint_type == 'equals':

                    # this condition is for fitting EoS for EAM function which
                    # requires a refernce ground state crystal structure
                    if p.endswith('latticetype'):
                        _v = self.parameter_distribution_definition[p][1]
                        _parameters[p] = _v

                    # process evaluation strings
                    elif type(self.parameter_distribution_definition[p]
                              [1]) is not list:
                        _str_eval = str(
                            self.parameter_distribution_definition[p][1])

                        # replace string values with numerical values
                        for fp in self.free_parameter_names:
                            if fp in _str_eval:
                                _str_eval = _str_eval.replace(
                                    fp, str(_parameters[fp]))

                        # evaluate the string into a float
                        _parameters[p] = eval(_str_eval)
                    else:
                        raise ValueError("oops")

            for p in self.constrained_parameter_names:
                if self.parameter_distribution_definition[p][0] == 'equals':
                    # some EAM potentials have a normalizing equilbirum density
                    # which have to be determined based upon the parameterization of
                    # the electron density function
                    if type(self.parameter_distribution_definition[p]
                            [1]) is list:
                        if self.parameter_distribution_definition[p][1][
                                0] == 'equilibrium_density':
                            a0 = self.parameter_distribution_definition[p][1][
                                1]
                            latt = self.parameter_distribution_definition[p][
                                1][2]
                            _parameters[
                                p] = self.calculate_equilibrium_density(
                                    a0, latt, _parameters)

            try:
                # now we check parameter inequality constraints
                for k, v in self.parameter_constraints.items():
                    _eval_str = v
                    for pn, pv in _parameters.items():
                        _eval_str = _eval_str.replace(pn, str(pv))

                    if eval(_eval_str) is False:
                        s = 'parameter constraint failed, {}'.format(k)
                        raise PyposmatBadParameterError(s,
                                                        parameters=_parameters)
                _results = self.evaluate_parameter_set(parameters=_parameters)
            except PyposmatBadParameterError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except LammpsSimulationError as e:
                assert isinstance(self.pyposmat_badparameters,
                                  PyposmatBadParametersFile)
                assert isinstance(self.pyposmat_badparameters.parameter_names,
                                  list)
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except PypospackTaskManagerError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except PypospackBadEamEosError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            else:

                # determine sim_id
                _sim_id = int(i_sample)

                self.pyposmat_datafile_out.write_simulation_results(
                    filename=self.pyposmat_data_out_filename,
                    sim_id=i_sample,
                    cluster_id=cluster_id,
                    results=_results)
            finally:
                # print out summaries every 10 solutions
                if (i_sample + 1) % 10 == 0:
                    n_samples_completed = i_sample + 1
                    time_end = time.time()
                    time_total = time_end - time_start_iteration
                    avg_time = time_total / n_samples_completed
                    _str_msg = 'R{}:{} samples completed in {:.4f}s. Avg_time = {:.4f}. n_errors = {}'.format(
                        self.mpi_rank, n_samples_completed, time_total,
                        avg_time, _n_errors)
                    self.log(_str_msg)

        d = OrderedDict()
        d['kde_bandwidth'] = OrderedDict()
        d['kde_bandwidth']['type'] = self.kde_bw_type
        d['kde_bandwidth']['h'] = self.kde_bw

    def run_file_sampling(self, filename_in):

        _datafile_in = PyposmatDataFile(filename=filename_in)
        _datafile_in.read()
        # configure random number generator

        self.write_data_out_header()
        self.write_badparameters_header()

        time_start_iteration = time.time()

        _n_errors = 0
        i_sample = 0
        for row in _datafile_in.df.iterrows():
            if self.mpi_rank != i_sample % self.mpi_size:
                i_sample += 1
                continue
            else:
                i_sample += 1
            _parameters = OrderedDict([(p, row[1][p])
                                       for p in self.parameter_names])
            _sim_id = row[1]['sim_id']

            # generate wierd things
            for p in self.constrained_parameter_names:
                if self.parameter_distribution_definition[p][0] == 'equals':
                    if type(self.parameter_distribution_definition[p]
                            [1]) is list:
                        if self.parameter_distribution_definition[p][1][
                                0] == 'equilibrium_density':
                            a0 = self.parameter_distribution_definition[p][1][
                                1]
                            latt = self.parameter_distribution_definition[p][
                                1][2]
                            _parameters[
                                p] = self.calculate_equilibrium_density(
                                    a0, latt, _parameters)
            try:
                # check constraints
                for k, v in self.parameter_constraints.items():
                    _eval_str = v
                    for pn, pv in _parameters.items():
                        _eval_str = _eval_str.replace(pn, str(pv))
                    if eval(_eval_str) is False:
                        raise PyposmatBadParameterError()

                _results = self.evaluate_parameter_set(parameters=_parameters)
            except PyposmatBadParameterError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except LammpsSimulationError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except PypospackTaskManagerError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except PypospackBadEamEosError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            else:
                if type(_sim_id) is float: _sim_id = int(sim_id)
                self.pyposmat_datafile_out.write_simulation_results(
                    filename=self.pyposmat_data_out_filename,
                    sim_id=_sim_id,
                    results=_results)
            finally:
                # print out summaries every 10 solutions
                i_sample = i_sample + 1
                if (i_sample) % 10 == 0:
                    n_samples_completed = i_sample
                    time_end = time.time()
                    time_total = time_end - time_start_iteration
                    avg_time = time_total / n_samples_completed
                    _str_msg = '{} samples completed in {:.4f}s. Avg_time = {:.4f}. n_errors = {}'.format(
                        n_samples_completed, time_total, avg_time, _n_errors)
                    print('rank{}:'.format(self.mpi_rank) + _str_msg)

    def calculate_equilibrium_density(self, a0, latt, parameters):
        _parameters = OrderedDict()
        for k, v in parameters.items():
            if k.startswith('d_'):
                _parameters[k[2:]] = v
            s = k[2:].split('_')[0]
        _potential_type = self.configuration.potential['density_type']
        _symbols = self.configuration.potential['symbols']
        _module_name, _class_name = PotentialObjectMap(
            potential_type=_potential_type)
        try:
            _module = importlib.import_module(_module_name)
            _class = getattr(_module, _class_name)
            _dens_potential = _class(symbols=_symbols)
        except:
            raise

        if latt == 'fcc':
            d = OrderedDict([('1NN', 2 / (2**0.5) * a0), ('2NN', 1.000 * a0),
                             ('3NN', 1.225 * a0)])
            Z = OrderedDict([('1NN', 12), ('2NN', 6), ('3NN', 24)])
            rcut = (d['2NN'] + d['3NN']) / 2.

            rmax = 10.
            r = np.linspace(1, 10, 5000) * rmax / 10
            rho = _dens_potential.evaluate(r, _parameters, rcut)

            rho_e = 0
            for m in Z:
                if d[m] < rcut:
                    rho_e += Z[m] * np.interp(d[m], r, rho[s])

            return rho_e

    def print_structure_database(self):
        m = [
            80 * '-', '{:^80}'.format('STRUCTURE DATABASE'), 80 * '-',
            'structure_directory:{}'.format(self.structure_directory), '',
            '{:^20} {:^20}'.format('name', 'filename'),
            '{} {}'.format(20 * '-', 20 * '-')
        ]
        m += [
            '{:20} {:20}'.format(k, v)
            for k, v in self.structures['structures'].items()
        ]
        self.log(m)

    def print_sampling_configuration(self):
        print(80 * '-')
        print('{:^80}'.format('SAMPLING CONFIGURATION'))
        print(80 * '-')

        print('{:^10} {:^10} {:^20}'.format('iteration', 'n_samples',
                                            'sampling_type'))
        print('{} {} {}'.format(10 * '-', 10 * '-', 20 * '-'))

        for i in range(self.n_iterations):
            _sample_type = self.configuration.sampling_type[i]['type']
            if _sample_type == 'kde_w_clusters':
                _n_samples = self.configuration.sampling_type[i][
                    'n_samples_per_cluster']
            else:
                _n_samples = self.configuration.sampling_type[i]['n_samples']
            print('{:^10} {:^10} {:^20}'.format(i, _n_samples, _sample_type))

    def print_initial_parameter_distribution(self):
        print(80 * '-')
        print('{:80}'.format('INITIAL PARAMETER DISTRIBUTION'))
        print(80 * '-')
        for p in self.parameter_distribution_definition:
            if p in self.free_parameter_names:
                str_free = 'free'
                if self.parameter_distribution_definition[p][0] == 'uniform':
                    print('{:^20} {:^10} {:^10} {:^10} {:^10}'.format(
                        p, str_free,
                        self.parameter_distribution_definition[p][0],
                        self.parameter_distribution_definition[p][1]['a'],
                        self.parameter_distribution_definition[p][1]['b']))
                elif self.parameter_distribution_definition[p][0] == 'normal':
                    print('{:^20} {:^10} {:^10} {:^10} {:^10}'.format(
                        p, str_free,
                        self.parameter_distribution_definition[p][0],
                        self.parameter_distribution_definition[p][1]['mu'],
                        self.parameter_distribution_definition[p][1]['sigma']))
                else:
                    _distribution_type = self.parameter_distribution_defintion[
                        p][0]
                    s = "incorrection parameter distribution for parameter {}.  probability distribution function, {}, is not supported"
                    s = s.format(p, _distribution_type)
                    raise ValueError(s)

            else:
                str_free = 'not_free'
                print('{:^20} {:^10}'.format(p, str_free))
Example #15
0
def test____init____wo_filename():
    o = PyposmatBadParametersFile()
    assert o.filename == 'pyposmat.badparameters.out'
    assert o.configuration is None
    def merge_error_files(self, i_iteration):
        """ merge the pyposmat data files

        Args:
            i_iteration(int): the current iteration which just finished
            last_datafile_fn(str,optional): the filename of the last dataset in the data directory.
            new_datafile_fn(str,optional): where to output the file results 
        """

        badparameters_fn = os.path.join(self.data_directory,
                                        'pyposmat.badparameters.out')

        data_dir = self.data_directory
        rank_dirs = [
            v for v in os.listdir(self.root_directory) if v.startswith('rank_')
        ]
        filenames = [
            os.path.join(self.root_directory, v, 'pyposmat.badparameters.out')
            for v in rank_dirs
        ]

        # consolidate rank directories
        badparameters_new = None
        badparameters_next = None
        for i, v in enumerate(filenames):
            if badparameters_new is None:
                try:
                    badparameters_new = PyposmatBadParametersFile(
                        o_config=self.configuration)
                    badparameters_new.read(filename=v)
                except FileNotFoundError as e:
                    self.log("no bad parameters file at {}".format(v))

            else:
                try:
                    badparameters_next = PyposmatBadParametersFile(
                        o_config=self.configuration)
                    badparameters_next.read(filename=v)
                    badparameters_new.df = pd.concat(
                        [badparameters_new.df, badparameters_next.df])
                except FileNotFoundError as e:
                    self.log("no bad parameters file as {}".format(v))

        # determine the sim_id for bad parameters of the sim_id
        if badparameters_new.df is None:
            # no previous bad paramters found
            # TODO: need to implement something here to deal with bad parameters
            pass

        else:
            nrows = len(badparameters_new.df)
            sim_id_fmt = '{:0>2}_{:0>6}'
            sim_id_str = [
                sim_id_fmt.format(i_iteration, i) for i in range(nrows)
            ]
            badparameters_new.df['sim_id'] = sim_id_str

            if self.configuration.sampling_type[i_iteration][
                    'type'] == "from_file":
                badparameters_new.write(filename=badparameters_fn)

            else:
                self.log(
                    "merging with bad candidates from previous simulations")
                self.log("\tfilename:{}".format(badparameters_fn))
                badparameters = PyposmatBadParametersFile(
                    o_config=self.configuration)

                try:
                    badparameters.read(filename=badparameters_fn)
                    badparameters.df = pd.concat(
                        [badparameters.df, badparameters_new.df])
                    badparameters.write(filename=badparameters_fn)
                except FileNotFoundError as e:
                    if i_iteration == 0:
                        badparameters_new.write(filename=badparameters_fn)
                    else:
                        raise