class FileSampler():

    def __init__(self,
                 configuration,
                 data,
                 structure_name,
                 structure_path,
                 workflow_type,
                 workflow_definition):
        self.initialize_configuration(configuration)
        self.initialize_data(data)
        self.structure_name = structure_name
        self.strucutre_path = structure_path
        self.workflow_type = workflow_type
        self.workflow_definition = workflow_definition
        self.potential_definition = self.configuration.potential

    def initialize_configuration(self,configuration):
        if isinstance(configuration,PyposmatConfigurationFile):
            self.configuration = configuration
        elif isinstance(configuration,str):
            self.configuration = PyposmatConfigurationFile()
            self.configuration.read(filename=configuration)
        else:
            msg = ("configuration must be a path to a configuration file or an "
                   "instance of the PyposmatConfigurationFile,")
            raise TypeError(msg)

    def initialize_data(self,data):
        if isinstance(data,PyposmatDataFile):
            self.data = data
        elif isinstance(data,str):
            self.data = PyposmatDataFile()
            self.data.read(filename=data)
        else:
            msg = ("data must be a path to a data file or an instance of "
                   "PyposmatDataFile.")
            raise TypeError(msg)

    def run(self):
        for index,row in self.data.df.iterrows():
            sim_id = row['sim_id']
            print('working on sim_id:{}'.format(sim_id))
            parameters = OrderedDict([(k,row[k]) for k in self.configuration.parameter_names])

            original_path = os.getcwd()
            os.mkdir(sim_id)
            os.chdir(sim_id)
            if workflow_type == 'lmps_thermal_expansion':
                workflow = LammpsThermalExpansion(
                        structure_name=Si_structure_definition['name'],
                        structure_path=Si_structure_definition['filename'],
                        **workflow_definition)
                workflow.create_task_configurations()
                workflow.create_tasks()
                workflow.prepare_tasks(
                        potential_definition = self.potential_definition,
                        potential_parameters = parameters)
                workflow.run()
            os.chdir(original_path)
Ejemplo n.º 2
0
 def initialize_kde_data(self,kde_data_fn=None,o_kde_data=None):
     if kde_data_fn is not None and o_kde_data is not None:
         m = (
             "must either provide the path to kde_data_fn or a PyposmatDataFile"
             "instance to to o_kde_data"
         )
         raise TypeError(m)
     # default behavior
     elif kde_data_fn is None and o_kde_data is None:
         self.kde_data = PyposmatDataFile()
         self.kde_data.read(filename=self.kde_data_fn)
     # a path is provided
     elif isinstance(kde_data_fn,str):
         self.kde_data_fn = kde_data_fn
         self.kde_data = PyposmatDataFile()
         self.kde_data.read(filename=self.kde_data_fn)
     # an object is provided
     elif isinstance(o_kde_data,PyposmatDataFile):
         self.kde_data_fn = None
         self.kde_data = o_kde_data
     else:
         m = (
             "must either provide the path to kde_data_fn or a PyposmatDataFile"
             "instance to to o_kde_data"
         )
         raise TypeError(m)
Ejemplo n.º 3
0
 def write_kde_file(self,filename):
     names = ['sim_id'] + self.parameter_names + self.qoi_names + self.error_names
    
     self.kde_data = PyposmatDataFile()
     self.kde_data.read(filename=self.results_data_fn)
     self.kde_data.df = self.kde_data.df.iloc[list(self.filter_set_info['is_survive_idx'])]
     self.kde_data.write(filename=filename)
Ejemplo n.º 4
0
def test__write_simulation_results__no_filename():

    cleanup_test()

    parameter_names = ['param{}'.format(i + 1) for i in range(3)]
    qoi_names = ['qoi{}'.format(i + 1) for i in range(5)]
    error_names = ['err{}'.format(i + 1) for i in range(5)]

    datafile = PyposmatDataFile()
    datafile.write_header_section(parameter_names=parameter_names,
                                  qoi_names=qoi_names,
                                  error_names=error_names,
                                  filename=datafile_out_fn)

    sim_id = "test_id"

    results = OrderedDict()
    results['parameters'] = OrderedDict([(v, 1.) for v in parameter_names])
    results['qois'] = OrderedDict([(v, 2.) for v in qoi_names])
    results['errors'] = OrderedDict([(v, 3.0) for v in error_names])

    datafile.write_simulation_results(sim_id, results)

    assert os.path.isfile(datafile_out_fn)

    datafile_read = PyposmatDataFile()
    datafile_read.read(filename=datafile_out_fn)
Ejemplo n.º 5
0
def test__attribute__names__after_reading_file():
    datafile_in_fn = "../../../../../data/MgO_pareto_data/culled_004.out"

    datafile = PyposmatDataFile()
    datafile.read(datafile_in_fn)

    assert type(datafile.names) is list
def dev__get_descriptive_statistics__from_kde_file():
    print(80 * '-')
    print(
        '{:^80}'.format('method -> get_descriptive_statistics__from_kde_file'))

    testing_set = get_testing_set()
    config_fn = testing_set['config_fn']
    results_data_fn = testing_set['results_fn']
    kde_data_fn = testing_set['kde_fn']

    assert os.path.isfile(config_fn)
    assert os.path.isfile(results_data_fn)
    assert os.path.isfile(kde_data_fn)

    o = PyposmatDataAnalyzer(config_fn=config_fn,
                             results_data_fn=results_data_fn)

    kde_data = PyposmatDataFile()
    kde_data.read(filename=kde_data_fn)

    descriptive_statistics = o.get_descriptive_statistics(df=kde_data.df)

    print(
        o.str__descriptive_statistics(
            descriptive_statistics=descriptive_statistics))
    print(kde_data.df.shape)
Ejemplo n.º 7
0
    def read_data(self, filename):
        self.datafile = PyposmatDataFile()
        self.datafile.read(filename=filename)

        self.parameter_names = list(self.datafile.parameter_names)
        self.qoi_names = list(self.datafile.qoi_names)
        self.error_names = list(self.datafile.error_names)

        self.param_names = list(self.datafile.parameter_names)
        self.qoi_names = list(self.datafile.qoi_names)
        self.err_names = list(self.datafile.error_names)
        print("parameter names")
        print(type(self.param_names))
        for i, v in enumerate(self.param_names):
            print("{:3} {:<20}".format(i, v))
        print("qoi names")
        print(type(self.qoi_names))
        for i, v in enumerate(self.qoi_names):
            print("{:3} {:<20}".format(i, v))
        print("error_names")
        print(type(self.err_names))
        for i, v in enumerate(self.err_names):
            print("{:3} {:<20}".format(i, v))
        # generate pandas dataframes
        self.param_df = copy.deepcopy(self.datafile.df[self.param_names])
        self.qoi_df = copy.deepcopy(self.datafile.df[self.qoi_names])
        self.err_df = copy.deepcopy(self.datafile.df[self.err_names])
        self.total_df = pd.concat([self.param_df, self.qoi_df, self.err_df],
                                  axis=1)
Ejemplo n.º 8
0
 def read_data(self, filename):
     self.data = PyposmatDataFile()
     self.data.read(filename)
     self.df = self.data.df
     self.parameter_names = self.data.parameter_names
     self.error_names = self.data.error_names
     self.qoi_names = self.data.qoi_names
Ejemplo n.º 9
0
def test__read__wo_named_arguments():
    datafile = PyposmatDataFile()
    datafile.read(MgO_datafile)

    assert type(datafile.names) is list
    assert len(expected_names) == len(datafile.names)
    for i, v in enumerate(expected_names):
        assert expected_names[i] == datafile.names[i]

    assert type(datafile.parameter_names) is list
    assert len(parameter_names) == len(datafile.parameter_names)
    for i, v in enumerate(parameter_names):
        assert parameter_names[i] == datafile.parameter_names[i]

    assert type(datafile.qoi_names) is list
    assert len(qoi_names) == len(datafile.qoi_names)
    for i, v in enumerate(qoi_names):
        assert qoi_names[i] == datafile.qoi_names[i]

    assert type(datafile.error_names) is list
    assert len(error_names) == len(datafile.error_names)
    for i, v in enumerate(error_names):
        assert error_names[i] == datafile.error_names[i]

    assert type(datafile.df) is pd.DataFrame
Ejemplo n.º 10
0
def dev__read():
    
    testing_set = get_testing_set()

    o = PyposmatDataFile()
    o.read(filename=testing_set['results_data_fn'])

    print(o.df['sim_id'])
Ejemplo n.º 11
0
 def set_data(self, data):
     if isinstance(data, str):
         self.data = PyposmatDataFile()
         self.data.read(data)
     elif isinstance(data, PyposmatDataFile):
         self.data = data
     else:
         raise TypeError
 def initialize_data(self,data):
     if isinstance(data,PyposmatDataFile):
         self.data = data
     elif isinstance(data,str):
         self.data = PyposmatDataFile()
         self.data.read(filename=data)
     else:
         msg = ("data must be a path to a data file or an instance of "
                "PyposmatDataFile.")
         raise TypeError(msg)
def test__read_datafile():
    from pypospack.pyposmat.data import PyposmatDataFile
    o_data = PyposmatDataFile()
    o_data.read(filename=datafile_fn)

    o_rugplot = PyposmatParetoRugplot()
    o_rugplot.read_datafile(filename=datafile_fn)

    import pandas as pd
    assert type(o_rugplot.data.df) is pd.DataFrame
Ejemplo n.º 14
0
    def __init__(self, configuration_fn, datafile_fn):
        self.configuration_fn = configuration_fn
        self.datafile_fn = datafile_fn

        if configuration_fn is not None:
            self.configuration = PyposmatConfigurationFile()
            self.configuration.read(configuration_fn)
        if datafile_fn is not None:
            self.datafile = PyposmatDataFile()
            self.datafile.read(filename=datafile_fn)
Ejemplo n.º 15
0
    def read_datafile(self,filename):
        self.datafile = PyposmatDataFile()
        self.datafile.read(filename=filename)

        self._parameter_names = self.datafile.parameter_names
        self._qoi_names = self.datafile.qoi_names
        self._error_names = self.datafile.error_names

        self._df = copy.deepcopy(self.datafile.df)
        self.create_absolute_errors()
def covariance_analysis(data_fn,names):
    assert isinstance(data_fn,str)
    assert isinstance(names,list)

    data = PyposmatDataFile()
    data.read(filename=data_fn)

    cov_matrix = np.cov(data.df[names].T)
    w,v = linalg.eig(cov_matrix)
    print("eigenvalues:\n",w)
    print("eigenvectors:\n",v)
Ejemplo n.º 17
0
def test__get_header_string():

    parameter_names = ['param{}'.format(i + 1) for i in range(3)]
    qoi_names = ['qoi{}'.format(i + 1) for i in range(5)]
    error_names = ['err{}'.format(i + 1) for i in range(5)]

    datafile = PyposmatDataFile()
    s = datafile.get_header_string(parameter_names=parameter_names,
                                   qoi_names=qoi_names,
                                   error_names=error_names)

    assert type(s) is str

    # check assignment
    assert type(datafile.parameter_names) is list
    assert len(datafile.parameter_names) == len(parameter_names)
    for i, v in enumerate(parameter_names):
        assert datafile.parameter_names[i] == v

    assert type(datafile.qoi_names) is list
    assert len(datafile.qoi_names) == len(qoi_names)
    for i, v in enumerate(qoi_names):
        assert datafile.qoi_names[i] == v

    assert type(datafile.error_names) is list
    assert len(datafile.error_names) == len(error_names)
    for i, v in enumerate(error_names):
        assert datafile.error_names[i] == v

    # check string
    lines = s.split("\n")
    line_1 = lines[0].strip().split(",")
    line_2 = lines[1].strip().split(",")

    # check line 1
    assert 'sim_id' in line_1
    assert 'cluster_id' not in line_1
    for v in parameter_names:
        assert v in line_1
    for v in qoi_names:
        assert v in line_1
    for v in error_names:
        assert v in line_1

    # check line 2
    assert line_2.count('sim_id') == 1
    assert line_2.count('cluster_id') == 0
    assert line_2.count('param') == len(parameter_names)
    assert line_2.count('qoi') == len(qoi_names)
    assert line_2.count('err') == len(error_names)
    assert line_2.count('qoi_v') == 0
    assert line_2.count('err_v') == 0
Ejemplo n.º 18
0
    def read_datafile(self, filename=None):

        if filename is not None: self.data_fn = filename
        _filename = self.data_fn

        self.data = PyposmatDataFile()
        self.data.read(_filename)

        self.df = copy.deepcopy(self.data.df)

        (_nrows, _ncols) = self.df.shape
        self.data_nrows = _nrows
        self.data_ncols = _ncols
Ejemplo n.º 19
0
    def _initialize_data(self, data):
        if isinstance(data, str):
            assert os.path.isfile(data)
            self.data = PyposmatDataFile()
            self.data.read(filename=data)
        elif isinstance(data, PyposmatDataFile):
            self.data = deepcopy(data)
        else:
            raise TypeError('data cannot be type:{}'.format(str(type(data))))

        self.data.create_normalized_errors(
                normalize_type='by_qoi_target',
                qoi_targets=self.configuration.qoi_targets)
Ejemplo n.º 20
0
    def read_datafile(self, filename=None):
        if filename is not None:
            self.datafile_fn = filename
        _filename = self.datafile_fn
        self.datafile = PyposmatDataFile()

        self.datafile.read(filename=_filename)

        self._parameter_names = self.datafile.parameter_names
        self._qoi_names = self.datafile.qoi_names
        self._error_names = self.datafile.error_names

        self.df = copy.deepcopy(self.datafile.df)
        self.create_absolute_errors()
Ejemplo n.º 21
0
class PyposmatPostProcessorTestHarness(object):
    def __init__(self, configuration_fn, datafile_fn):
        self.configuration_fn = configuration_fn
        self.datafile_fn = datafile_fn

        if configuration_fn is not None:
            self.configuration = PyposmatConfigurationFile()
            self.configuration.read(configuration_fn)
        if datafile_fn is not None:
            self.datafile = PyposmatDataFile()
            self.datafile.read(filename=datafile_fn)

    def get_parameter_names(self):
        return self.configuration.parameter_names
def calculate_kld(data_1_fn,data_2_fn,names,n_samples=2000):
    assert isinstance(data_1_fn,str)
    assert isinstance(data_2_fn,str)
    assert isinstance(n_samples,int)

    assert os.path.isfile(data_1_fn)
    assert os.path.isfile(data_1_fn)

    data_1 = PyposmatDataFile()
    data_1.read(filename=data_1_fn)

    data_2 = PyposmatDataFile()
    data_2.read(filename=data_2_fn)

    w1,v1 = linalg.eig(np.cov(data_1.df[names].T))
    w2,v2 = linalg.eig(np.cov(data_2.df[names].T))
  
    cov1_ill_conditioned = any([k < 0 for k in w1.tolist()])
    cov2_ill_conditioned = any([k < 0 for k in w2.tolist()])

    any_ill_conditioned = any([cov1_ill_conditioned,cov2_ill_conditioned])

    if any_ill_conditioned:
        print('using ill-conditioned kde')
        kde_1 = GaussianKde(data_1.df[names].T)
        print(kde_1.n, kde_1.d)
        kde_2 = GaussianKde(data_2.df[names].T)
    else:
        kde_1 = gaussian_kde(data_1.df[names].T)
        kde_2 = gaussian_kde(data_2.df[names].T)
    
    kld = kullbach_lieber_divergence(kde_1,kde_2,n_samples)
    return kld
Ejemplo n.º 23
0
def test__write_header_section():

    cleanup_test()

    parameter_names = ['param{}'.format(i + 1) for i in range(3)]
    qoi_names = ['qoi{}'.format(i + 1) for i in range(5)]
    error_names = ['err{}'.format(i + 1) for i in range(5)]

    datafile = PyposmatDataFile()
    datafile.write_header_section(parameter_names=parameter_names,
                                  qoi_names=qoi_names,
                                  error_names=error_names,
                                  filename=datafile_out_fn)

    assert os.path.isfile(datafile_out_fn)

    datafile_read = PyposmatDataFile()
    datafile_read.read(filename=datafile_out_fn)

    assert len(datafile_read.parameter_names) == len(parameter_names)
    for i, v in enumerate(parameter_names):
        assert datafile_read.parameter_names[i] == v

    assert len(datafile_read.qoi_names) == len(qoi_names)
    for i, v in enumerate(qoi_names):
        assert datafile_read.qoi_names[i] == v

    assert len(datafile_read.error_names) == len(qoi_names)
    for i, v in enumerate(error_names):
        assert datafile_read.error_names[i] == v

    cleanup_test()
Ejemplo n.º 24
0
    def initialize_data(self, data):
        assert isinstance(data,str) \
                or isinstance(data,PyposmatDataFile) \
                or data is None

        if isinstance(data, str):
            self.data = PyposmatDataFile()
            self.data.read(filename=data)
        elif isinstance(data, PyposmatDataFile):
            self.data = data
        elif data is None:
            self.data = None
        else:
            m = 'data argument must either be path string or a PyposmatDataFile object'
            raise TypeError(m)
Ejemplo n.º 25
0
    def read_data(self, filename):
        """read in pyposmat data filename

        Args:
            filename(str): path of the data file
        """

        self.data_fn = filename
        self.data = PyposmatDataFile()
        self.data.read(filename)

        self.parameter_names = self.data.parameter_names
        self.qoi_names = self.data.qoi_names
        self.error_names = self.data.error_names
        self.df = self.data.df
Ejemplo n.º 26
0
def show_qoi_targets(config_fn,
                     data_fn):

    o_config = PyposmatConfigurationFile()
    o_config.read(filename=config_fn)

    o_data = PyposmatDataFile()
    o_data.read(filename=data_fn)

    for qoi_name, qoi_target in o_config.qoi_targets.items():
        try:
            qoi_avg = o_data.df[qoi_name].mean()
        except KeyError as e:
            qoi_avg = 'no value'
        s = "{:20} {:10} {:10}".format(qoi_name,qoi_target,qoi_avg)
        print(s)
Ejemplo n.º 27
0
    def merge_pypospack_datafiles(datafile_fns):
        d0 = PyposmatDataFile()
        d0.read(filename=datafile_fns[0])
        df0 = d0.df
        for i in range(1, len(datafile_fns)):
            print("merging {}...".format(datafile_fns[i]))
            d = PyposmatDataFile()
            d.read(filename=datafile_fns[i])
            df = d.df

            df0 = pd.concat([df0, df]).drop_duplicates().reset_index(drop=True)
        d0.df = df0
        return d0
Ejemplo n.º 28
0
def test____init____data_as_obj():
    o = Pyposmat2DDensityPlot(data=PyposmatDataFile())

    assert isinstance(o, Pyposmat2DDensityPlot)
    assert o.configuration is None
    assert isinstance(o.data, PyposmatDataFile)
    assert o.fig is None
    assert o.ax is None
Ejemplo n.º 29
0
def get_best_parameterization(config_fn,data_fn,metric_name='d_metric',o_config=None,o_data=None):
    _analyzer = PyposmatDataAnalyzer()
    _analyzer.read_configuration_file(filename=config_fn)
    _analyzer.read_data_file(filename=data_fn)

    # calculate the scoring metric
    if metric_name is 'd_metric':
        _df = _analyzer.calculate_d_metric(df=_analyzer.datafile.df)
    else:
        s = "The metric name {} is unsupported"
        s = s.format(metric_name)
        raise PyposmatUnsupportedPotentialScoringMetric(s)

    _data = PyposmatDataFile()
    _data.read(filename=data_fn)
    _data.df = _df
    _data.subselect_by_score(score_name='d_metric',n=1)

    _free_parameter_names = _analyzer.configuration.free_parameter_names
    
    _parameter_best_dict = OrderedDict()
    for pn in _free_parameter_names:
        _parameter_best_dict[pn] = _data.sub_parameter_df.iloc[0][pn]

    return _parameter_best_dict
Ejemplo n.º 30
0
class BaseAnalysis(object):
    def __init__(self, configuration, data, output_path=None):
        self.configuration = None
        self.data = None
        self.output_path = None

        self._initialize_configuration(configuration=configuration)
        self._initialize_data(data=data)
        self._initialize_output_path(path=output_path)

    def _initialize_configuration(self, configuration):
        if isinstance(configuration, str):
            assert os.path.isfile(configuration)
            self.configuration = PyposmatConfigurationFile()
            self.configuration.read(filename=configuration)
        elif isinstance(configuration, PyposmatConfigurationFile):
            self.configuration = configuration
        else:
            raise TypeError('configuration cannot be type:{}'.format(
                str(type(configuration))))

    def _initialize_data(self, data):
        if isinstance(data, str):
            assert os.path.isfile(data)
            self.data = PyposmatDataFile()
            self.data.read(filename=data)
        elif isinstance(data, PyposmatDataFile):
            self.data = deepcopy(data)
        else:
            raise TypeError('data cannot be type:{}'.format(str(type(data))))

        self.data.create_normalized_errors(
            normalize_type='by_qoi_target',
            qoi_targets=self.configuration.qoi_targets)

    def _initialize_output_path(self, path):
        if path is None:
            self.output_path = None
        elif isinstance(path, str):
            if os.path.isdir(path):
                shutil.rmtree(path)
            os.mkdir(path)
            self.output_path = path
        else:
            raise TypeError