class FileSampler():

    def __init__(self,
                 configuration,
                 data,
                 structure_name,
                 structure_path,
                 workflow_type,
                 workflow_definition):
        self.initialize_configuration(configuration)
        self.initialize_data(data)
        self.structure_name = structure_name
        self.strucutre_path = structure_path
        self.workflow_type = workflow_type
        self.workflow_definition = workflow_definition
        self.potential_definition = self.configuration.potential

    def initialize_configuration(self,configuration):
        if isinstance(configuration,PyposmatConfigurationFile):
            self.configuration = configuration
        elif isinstance(configuration,str):
            self.configuration = PyposmatConfigurationFile()
            self.configuration.read(filename=configuration)
        else:
            msg = ("configuration must be a path to a configuration file or an "
                   "instance of the PyposmatConfigurationFile,")
            raise TypeError(msg)

    def initialize_data(self,data):
        if isinstance(data,PyposmatDataFile):
            self.data = data
        elif isinstance(data,str):
            self.data = PyposmatDataFile()
            self.data.read(filename=data)
        else:
            msg = ("data must be a path to a data file or an instance of "
                   "PyposmatDataFile.")
            raise TypeError(msg)

    def run(self):
        for index,row in self.data.df.iterrows():
            sim_id = row['sim_id']
            print('working on sim_id:{}'.format(sim_id))
            parameters = OrderedDict([(k,row[k]) for k in self.configuration.parameter_names])

            original_path = os.getcwd()
            os.mkdir(sim_id)
            os.chdir(sim_id)
            if workflow_type == 'lmps_thermal_expansion':
                workflow = LammpsThermalExpansion(
                        structure_name=Si_structure_definition['name'],
                        structure_path=Si_structure_definition['filename'],
                        **workflow_definition)
                workflow.create_task_configurations()
                workflow.create_tasks()
                workflow.prepare_tasks(
                        potential_definition = self.potential_definition,
                        potential_parameters = parameters)
                workflow.run()
            os.chdir(original_path)
def calculate_kld_parameters(config,data_directory,kld_param_fn='pyposmat.kld_param.out'):
    assert isinstance(config,str) or isinstance(config,PyposmatConfigurationFile)
    assert os.path.isdir(data_directory)
    assert isinstance(kld_param_fn,str)

    # process the the configuration argument, the configuration argument has two
    # options for processing
    # (1) if config is a str, the config is assumed to be a path to the
    #     the path to the configuration file, and o_config is initialized from it
    # (2) if config is PyposmatConfigurationFile object, then o_config is set to it
    if isinstance(config,str):
        o_config = PyposmatConfigurationFile()
        o_config.read(filename=config)
    else:
        assert isinstance(config,PyposmatConfigurationFile)
        o_config = config

    kld = OrderedDict()
    for i in range(o_config.n_iterations):
        kld[i] = OrderedDict()
        if i == 0:
            kld[i]['results'] = None
            kld[i]['kde'] = None

            kld[i]['filter'] = calculate_kld(
                data_1_fn=os.path.join(data_directory,'pyposmat.results.{}.out'.format(i)),
                data_2_fn=os.path.join(data_directory,'pyposmat.kde.{}.out'.format(i+1)),
                names = o_config.free_parameter_names,
                n_samples=n_samples)
        else:
            kld[i]['results'] = calculate_kld(
                data_1_fn=os.path.join(data_directory,'pyposmat.results.{}.out'.format(i-1)),
                data_2_fn=os.path.join(data_directory,'pyposmat.results.{}.out'.format(i)),
                names = o_config.free_parameter_names,
                n_samples=n_samples)
            kld[i]['kde'] = calculate_kld(
                data_1_fn=os.path.join(data_directory,'pyposmat.kde.{}.out'.format(i)),
                data_2_fn=os.path.join(data_directory,'pyposmat.kde.{}.out'.format(i+1)),
                names = o_config.free_parameter_names,
                n_samples=n_samples)
            kld[i]['filter'] = calculate_kld(
                data_1_fn=os.path.join(data_directory,'pyposmat.results.{}.out'.format(i)),
                data_2_fn=os.path.join(data_directory,'pyposmat.kde.{}.out'.format(i+1)),
                names = o_config.free_parameter_names,
                n_samples=n_samples)
        print(i)

    # write out the kld_parameters file
    with open(kld_param_fn,'w') as f:
        f.write(",".join(['iteration','results','kde','filter'])+"\n")

        for kld_iteration,kld_row in kld.items():
            s_list = []
            s_list.append(kld_iteration)
            for k in ['results','kde','filter']:
                if kld_row[k] is None:
                    s_list.append(float('NaN'))
                else:
                    s_list.append(kld_row[k][0])
            f.write(",".join([str(s) for s in s_list])+"\n")
def do_attribute_tests(sampler, config_fn):
    from pypospack.pyposmat.engines import PyposmatBaseSampler
    from pypospack.pyposmat.data import PyposmatConfigurationFile

    # test arguments
    assert type(sampler) is PyposmatBaseSampler
    assert type(config_fn) is str

    config = PyposmatConfigurationFile()
    config.read(filename=config_fn)

    assert sampler.structure_directory == config.structures[
        'structure_directory']
    assert sampler.n_iterations == config.sampling_type['n_iterations']
    assert sampler.parameter_names == config.parameter_names
    assert sampler.qoi_names == config.qoi_names
    assert sampler.error_names == config.error_names
    assert sampler.free_parameter_names == config.free_parameter_names

    assert set(sampler.parameter_constraints.keys()) == set(
        config.sampling_constraints.keys())
    assert all([
        sampler.parameter_constraints[k] == config.sampling_constraints[k]
        for k in sampler.parameter_constraints
    ])
    assert sampler.parameter_constraints == config.sampling_constraints
    assert sampler.constrained_parameter_names == \
            [p for p in sampler.parameter_names if p not in sampler.free_parameter_names]
def test____init____using_path_args():
    testing_set = get_testing_set()

    o = PyposmatDataAnalyzer(
            config_fn=testing_set['config_fn'],
            results_data_fn=testing_set['results_data_fn']
    )

    config = PyposmatConfigurationFile()
    config.read(filename=testing_set['config_fn'])

    assert isinstance(o,PyposmatDataAnalyzer)

    assert o.config_fn == testing_set['config_fn']
    assert isinstance(o.configuration,PyposmatConfigurationFile)
    assert isinstance(o.parameter_names,list)
    assert set(o.parameter_names) == set(config.parameter_names)
    assert isinstance(o.error_names, list)
    assert set(o.error_names) == set(config.error_names)
    assert isinstance(o.qoi_names, list)
    assert set(o.qoi_names) == set(config.qoi_names)
    
    assert o.results_data_fn == testing_set['results_data_fn']
    assert isinstance(o.results_data,PyposmatDataFile)
    assert isinstance(o.results_df,pd.DataFrame)
def make_latex_table(config, data, qoi_type=None, param_type=None):
    qoi_types = ['by_qoi_target']
    param_type = []

    assert isinstance(config,str) \
           or isinstance(config,PyposmatConfigurationFile)
    assert isinstance(data,str) \
            or isinstance(data,PyposmatDataFile)

    if isinstance(config, str):
        o_config = PyposmatConfigurationFile()
        o_config.read(filename=config)
    elif isinstance(config, PyposmatConfigurationFile):
        o_config = config
    else:
        raise TypeError()

    if isinstance(data, str):
        o_data = PyposmatDataFile()
        o_data.read(filename=data)
    elif isinstance(data, PyposmatDataFile):
        o_data = data
    else:
        raise TypeError()

    if qoi_type == 'by_qoi_target':
        o_data.create_normalized_errors(normalize_type='by_qoi_target',
                                        qoi_targets=o_config.qoi_targets)
        df = o_data.df[o_data.normalized_error_names]
def test__initialize_configuration__with_object():
    testing_set = get_testing_set()

    o_config = PyposmatConfigurationFile()
    o_config.read(filename=testing_set['config_fn'])

    o = PyposmatDataAnalyzer()
    o.initialize_configuration(o_config=o_config)
Esempio n. 7
0
def check_pyposmat_configuration(args):
    _config_fn = args.configuration

    print('checking pyposmat configuration file')
    print('pyposmat_configuration_file:{}'.format(_config_fn))

    from pypospack.pyposmat.data import PyposmatConfigurationFile
    o = PyposmatConfigurationFile()
    o.read(filename=_config_fn)
    o.validate()
def test__initialize_configuration__with_object_and_path():
    testing_set = get_testing_set()

    o_config = PyposmatConfigurationFile()
    o_config.read(filename=testing_set['config_fn'])

    o = PyposmatDataAnalyzer()
    with pytest.raises(TypeError) as e:
        o.initialize_configuration(config_fn=testing_set['config_fn'],
                                  o_config=o_config)
Esempio n. 9
0
def get_qoi_database_from_PyposmatConfigurationFile(config_fn):
    config = PyposmatConfigurationFile()
    config.read(filename=config_fn)

    assert type(config.qois) is OrderedDict
    for qoi_id, qoi_info in config.qois.items():
        assert type(qoi_id) is str
        assert set(qoi_info.keys()) == set(['qoi_type','structures','target'])

    return config.qois
Esempio n. 10
0
def gmm_analysis(config_fn,
                 data_fn,
                 names,
                 output_directory='gmm_analysis',
                 max_components=20):
    assert isinstance(config_fn, str)
    assert isinstance(data_fn, str)
    assert os.path.isfile(config_fn)
    assert os.path.isfile(data_fn)

    if not os.path.isdir(output_directory):
        os.mkdir(output_directory)

    o_config = PyposmatConfigurationFile()
    o_config.read(filename=config_fn)

    o_data = PyposmatDataFile()
    o_data.read(filename=data_fn)
    o_data.create_normalized_errors(normalize_type='by_qoi_target',
                                    qoi_targets=o_config.qoi_targets)
    o_data.df['score'] = o_data.df[o_config.normalized_error_names].abs().sum(
        axis=1)

    data = o_data.df[names]

    n_components = np.arange(1, max_components)
    models = [
        GaussianMixture(n_components=n, covariance_type='full',
                        random_state=0).fit(data) for n in n_components
    ]

    # AIC analysis
    aic, aic_idx = min(
        (val, idx) for (idx, val) in enumerate([m.aic(data) for m in models]))
    aic_n_components = n_components[aic_idx]
    aic_criteria = [m.aic(data) for m in models]
    # BIC analysis
    bic, bic_idx = min(
        (val, idx) for (idx, val) in enumerate([m.bic(data) for m in models]))
    bic_n_components = n_components[bic_idx]
    bic_criteria = [m.bic(data) for m in models]

    #plot the criteria
    print('bic_n_components:{}'.format(bic_n_components))
    print('aic_n_components:{}'.format(aic_n_components))
    plot_fn = os.path.join(output_directory, 'aic_bic_plot.jpg')
    plot_gmm_aic_bic(filename=plot_fn,
                     n_components=n_components,
                     aic_criteria=aic_criteria,
                     bic_criteria=bic_criteria,
                     aic_n_components=aic_n_components,
                     bic_n_components=bic_n_components)

    filename = os.path.join('gmm_analysis', 'gmm_analysis.jpg')
    plot_gmm(models[bic_n_components], data, filename=filename)
def test__read_configuration():
    from pypospack.pyposmat.data import PyposmatConfigurationFile
    o_config = PyposmatConfigurationFile()
    o_config.read(filename=config_fn)

    o_rugplot = PyposmatParetoRugplot()
    o_rugplot.read_configuration(filename=config_fn)

    assert type(o_rugplot.parameter_names) is list
    assert type(o_rugplot.qoi_names) is list
    assert type(o_rugplot.error_names) is list
    assert type(o_rugplot.qoi_validation_names) is list
    assert type(o_rugplot.error_validation_names) is list
    assert isinstance(o_rugplot.qoi_targets, dict)
Esempio n. 12
0
class PyposmatPostProcessorTestHarness(object):
    def __init__(self, configuration_fn, datafile_fn):
        self.configuration_fn = configuration_fn
        self.datafile_fn = datafile_fn

        if configuration_fn is not None:
            self.configuration = PyposmatConfigurationFile()
            self.configuration.read(configuration_fn)
        if datafile_fn is not None:
            self.datafile = PyposmatDataFile()
            self.datafile.read(filename=datafile_fn)

    def get_parameter_names(self):
        return self.configuration.parameter_names
Esempio n. 13
0
def write_configuration_file(config_fn):
    import Si_sw

    #------------------------------------------------------------------------------
    # WRITE CONFIGURATION FILE
    #------------------------------------------------------------------------------
    Si_sw_configuration = PyposmatConfigurationFile()
    Si_sw_configuration.qois = Si_sw.Si_sw_qoi_db.qois
    Si_sw_configuration.potential = Si_sw.Si_sw_potential
    Si_sw_configuration.structures = Si_sw.Si_sw_structures
    Si_sw_configuration.sampling_type = Si_sw.Si_sw_sampling
    Si_sw_configuration.sampling_distribution = Si_sw.Si_sw_parameter_distribution
    Si_sw_configuration.write(filename=config_fn)
    Si_sw_configuration.read(filename=config_fn)
Esempio n. 14
0
def show_qoi_targets(config_fn,
                     data_fn):

    o_config = PyposmatConfigurationFile()
    o_config.read(filename=config_fn)

    o_data = PyposmatDataFile()
    o_data.read(filename=data_fn)

    for qoi_name, qoi_target in o_config.qoi_targets.items():
        try:
            qoi_avg = o_data.df[qoi_name].mean()
        except KeyError as e:
            qoi_avg = 'no value'
        s = "{:20} {:10} {:10}".format(qoi_name,qoi_target,qoi_avg)
        print(s)
def write_configuration_file(config_fn):
    """
    Args:
        config_fn(str): the name of the configuration file

    """
    from pypospack.pyposmat.data import PyposmatDataFile
    import Ni__eam__morse_exp_universal as Ni__eam
    Ni_eam_configuration = PyposmatConfigurationFile()
    Ni_eam_configuration.qois = Ni_eam.Ni_qoi_db.qois
    Ni_eam_configuration.potential = Ni_eam.Ni_eam_potential_formalism
    Ni_eam_configuration.structures = Ni_eam.Ni_structure_db
    Ni_eam_configuration.sampling_type = Ni_eam.Ni_eam_sampling
    Ni_eam_configuration.sampling_distribution =Ni_eam.Ni_eam_parameter_distribution
    Ni_eam_configuration.write(filename=config_fn)
    Ni_eam_configuration.read(filename=config_fn)
Esempio n. 16
0
class BaseAnalysis(object):
    def __init__(self, configuration, data, output_path=None):
        self.configuration = None
        self.data = None
        self.output_path = None

        self._initialize_configuration(configuration=configuration)
        self._initialize_data(data=data)
        self._initialize_output_path(path=output_path)

    def _initialize_configuration(self, configuration):
        if isinstance(configuration, str):
            assert os.path.isfile(configuration)
            self.configuration = PyposmatConfigurationFile()
            self.configuration.read(filename=configuration)
        elif isinstance(configuration, PyposmatConfigurationFile):
            self.configuration = configuration
        else:
            raise TypeError('configuration cannot be type:{}'.format(
                str(type(configuration))))

    def _initialize_data(self, data):
        if isinstance(data, str):
            assert os.path.isfile(data)
            self.data = PyposmatDataFile()
            self.data.read(filename=data)
        elif isinstance(data, PyposmatDataFile):
            self.data = deepcopy(data)
        else:
            raise TypeError('data cannot be type:{}'.format(str(type(data))))

        self.data.create_normalized_errors(
            normalize_type='by_qoi_target',
            qoi_targets=self.configuration.qoi_targets)

    def _initialize_output_path(self, path):
        if path is None:
            self.output_path = None
        elif isinstance(path, str):
            if os.path.isdir(path):
                shutil.rmtree(path)
            os.mkdir(path)
            self.output_path = path
        else:
            raise TypeError
Esempio n. 17
0
def make_rug_plot(config_fn,
                  data_fn,
                  ax=None,
                  plot_fn='rugplot.png'):

    o_config = PyposmatConfigurationFile()
    o_config.read(filename=config_fn)

    o_data = PyposmatDataFile()
    o_data.read(filename=data_fn)

    qoi_targets = o_config.qoi_targets
    #qoi_targets = get_qoi_targets(o_config)
    error_names = o_data.error_names
    qoi_names = o_data.qoi_names

    # create normalized error
    df = copy.deepcopy(o_data.df[error_names])
    for qn in qoi_names:
        en = "{}.err".format(qn)
        nen = "{}.nerr".format(qn)
        q = qoi_targets[qn]
        df[nen]=o_data.df[en]/q-q

    (_nrows,_ncols) = o_data.df.shape

    if ax is None:
        fig, ax = plt.subplots(nrows=1,ncols=1)

    for iq,qn in enumerate(qoi_names):
        _yloc = [iq+1]
        ax.scatter(
            df["{}.nerr".format(qn)],
            _nrows*[iq+1],
            marker='|',
            s=100.,
            color='k'
        )

    plt.sca(ax)
    plt.yticks(range(len(qoi_names)+1),['']+qoi_names)
    fig.savefig(plot_fn)
Esempio n. 18
0
class Manifold(object):
    
    def __init__(self,pyposmat_configuration,pyposmat_data,manifold_config=None):
        self.configuration = None
        self.data = None
        self.manifold_configuration = None
        self.initialize_configuration(pyposmat_configuration=pyposmat_configuration)
        self.initialize_data(pyposmat_data=pyposmat_data)
        self.initialize_manifold_config(manifold_config=manifold_config)

    def initialize_configuration(self,pyposmat_configuration):
        if isinstance(pyposmat_configuration,PyposmatConfigurationFile):
            self.configuration = pyposmat_configuration
        elif isinstance(pyposmat_configuration,str):
            self.configuration = PyposmatConfigurationFile()
            self.configuration.read(filename=pyposmat_configuration)
        else:
            raise TypeError('pyposmat_configuration must be either a path or PyposmatConfigurationFile')

    def initialize_data(self,pyposmat_data):
        if isinstance(pyposmat_data, PyposmatDataFile):
            self.data = pyposmat_data
        elif isinstance(pyposmat_data, str):
            self.data = PyposmatDataFile
            self.data.read(filename=pyposmat_data)
        else:
            raise TypeError('pyposmat_data must either be a path or a PyposmatDataFile')

    def initialize_manifold_configuration(self,manifold_configuration=None):
        if manifold_configuration is None:
            self.manifold_configuration = None
        else:
            raise NotImplementedError

    def learn_manifold(self,names,scaling_type):
        raise NotImplementedError

    def scale_data(self,X,scaling_type):
        if scaling_type='standard':
            X_scaled = preprocessing.scale(X)
        elif scaling_type='none':
            X_scaled = X
    def set_xticks(self,config,names,ax=None):
        if isinstance(config,PyposmatConfigurationFile):
            config_ = config
        elif isinstance(config,str):
            config_ = PyposmatConfigurationFile()
            config_.read(config)
        else:
            raise TypeError()

        if names == 'qoi_names':
            names_ = config_.qoi_names
        elif isinstance(names,list):
            names_ = names
        else:
            raise TypeError()

        if ax is None:
            ax_ = self.ax
        latex_labels = [config_.latex_labels[k]['label'] for k in names_]
        print(latex_labels)
        ax_.set_xticks(range(len(latex_labels)))
        ax_.set_xticklabels(latex_labels)
def test__get_header_string():
    testing_set = get_testing_set()

    o_config = PyposmatConfigurationFile()
    o_config.read(filename=testing_set['config_fn'])
    assert os.path.isfile(testing_set['config_fn'])

    f = PyposmatBadParametersFile(filename=testing_set['badparameters_out_fn'],
                                  config_fn=testing_set['config_fn'])
    s = f.get_header_string()

    assert type(s) is str

    header_line_1 = ['sim_id'] \
            + o_config.parameter_names \
            + ['reason']
    header_line_2 = ['sim_id'] \
            + len(o_config.parameter_names)*['param'] \
            + ['reason']
    s_test = "{}\n".format(",".join(header_line_1))
    s_test += "{}\n".format(",".join(header_line_2))

    assert s_test == s
Esempio n. 21
0
def test____init____w_filename_config_fn():
    testing_set = get_testing_set()

    assert os.path.isfile(testing_set['badparameters_in_fn'])
    assert os.path.isfile(testing_set['config_fn'])

    if os.path.isfile(testing_set['badparameters_out_fn']):
        m = "removing the badparameter_out_file:{}".format(
            testing_set['badparameters_out_fn'])
        print(m)
        os.remove(testing_set['badparameters_out_fn'])

    o = PyposmatBadParametersFile(filename=testing_set['badparameters_out_fn'],
                                  config_fn=testing_set['config_fn'])

    assert o.filename == testing_set['badparameters_out_fn']

    from pypospack.pyposmat.data import PyposmatConfigurationFile
    o_config = PyposmatConfigurationFile()
    o_config.read(filename=testing_set['config_fn'])
    assert isinstance(o.configuration, PyposmatConfigurationFile)
    assert isinstance(o.parameter_names, list)
    assert set(o.parameter_names) == set(o_config.parameter_names)
Esempio n. 22
0
def test__read_configuration_file():
    _config_fn = _scenario_MgO_buckingham['configuration_fn']
    _parameter_names = _scenario_MgO_buckingham['parameter_names']
    _free_parameter_names = _scenario_MgO_buckingham['free_parameter_names']
    o_config = PyposmatConfigurationFile()
    o_config.read(filename=_config_fn)

    # check parameter_names, order doesn't matter
    # 1. check that the lists are of the same size
    # 2. check that if x in A, then x also in B

    # parameter check task 1
    assert len(o_config.parameter_names) == len(_parameter_names)

    # parameter check task 2
    for pn in o_config.parameter_names:
        assert pn in _parameter_names

    # check free parameter_names, order doesn't matter
    # 1. check that the list are of the same size
    # 2. check that if x in A, then x also in B
    assert len(o_config.free_parameter_names) == len(_free_parameter_names)
    for pn in o_config.free_parameter_names:
        assert pn in _free_parameter_names
Esempio n. 23
0
class PyposmatBadParametersFile(object):
    def __init__(
        self,
        filename='pyposmat.badparameters.out',
        config_fn=None,
        o_config=None,
    ):
        assert filename is None or isinstance(filename, str)
        assert config_fn is None or isinstance(config_fn, str)
        assert o_config is None or isinstance(o_config,
                                              PyposmatConfigurationFile)

        self.filename = filename
        self._parameter_names = None
        self.w_cluster_id = False

        self.df = None
        self.parameter_df = None

        self.initialize_configuration(config_fn=config_fn, o_config=o_config)

    def initialize_configuration(self, config_fn, o_config):
        if isinstance(config_fn, str) and o_config is None:
            self.configuration = PyposmatConfigurationFile()
            self.configuration.read(filename=config_fn)
        elif isinstance(o_config,
                        PyposmatConfigurationFile) and config_fn is None:
            self.configuration = o_config
        elif config_fn is None and o_config is None:
            self.configuration = None
        elif isinstance(o_config, PyposmatConfigurationFile) and isinstance(
                config_fn, str):
            m = (
                "Cannot configure the PyposmatDataAnalyzer with both options "
                "o_config and config_fn both being specified.  Choose only one."
            )
            raise TypeError(m)
        else:
            m = ("wrong types:\n" "\to_config:{}\n" "\tconfig_fn:{}\n")
            m = m.format(type(o_config), type(config_fn))
            raise TypeError(m)

    @property
    def parameter_names(self):

        if isinstance(self.configuration, PyposmatConfigurationFile):
            return self.configuration.parameter_names
        else:
            return None

    @property
    def names(self):

        names = ['sim_id']
        if self.df is not None:
            if 'cluster_id' in self.df.columns:
                names += ['cluster_id']
        names += self.parameter_names
        names += ['reason']

        return names

    @property
    def types(self):
        types = ['sim_id']
        if self.df is not None:
            if 'cluster_id' in self.df.columns:
                names += ['cluster_id']
        types += len(self.parameter_names) * ['param']
        types += ['reason']

        return types

    @property
    def n_samples(self):
        (n_rows, n_cols) = self.df.shape
        return n_rows

    def get_header_string(self):

        header_line_1 = ['sim_id']\
                + self.parameter_names\
                + ['reason']
        header_line_2 = ['sim_id']\
                + len(self.parameter_names)*['param']\
                + ['reason']

        str_header_section = "{}\n".format(",".join(header_line_1))
        str_header_section += "{}\n".format(",".join(header_line_2))

        return str_header_section

    def write_header_section(self, filename=None):

        assert isinstance(self.parameter_names, list)
        assert filename is None or isinstance(filename, str)

        if filename is not None:
            self.filename = filename

        header_str = self.get_header_string()

        with open(self.filename, 'w') as f:
            f.write(header_str)

    def write_simulation_exception(self, sim_id, exception):
        is_debug = False
        assert isinstance(exception, BaseException)
        assert isinstance(self.parameter_names, list)

        if not isinstance(sim_id, str):
            sim_id = str(sim_id)
        s_reason = exception.explain()

        s = ",".join(
                [sim_id] \
                + [str(exception.kwargs['parameters'][k]) for k in self.parameter_names] \
                + [s_reason]
        ) + "\n"

        if is_debug:
            print(s)

        with open(self.filename, 'a') as f:
            f.write(s)

    def read(self, filename=None):
        if filename is not None:
            self.filename = filename
        try:
            with open(self.filename, 'r') as f:
                lines = f.readlines()
        except FileNotFoundError as e:
            print("cannot find file: {}".format(self.filename))
            print("current_working_dir: {}".format(os.getcwd()))
            raise

        self._names = [s.strip() for s in lines[0].strip().split(',')]
        self._types = [s.strip() for s in lines[1].strip().split(',')]

        table = []
        for line in lines[2:]:
            tokens = line.strip().split(',')
            values = []
            for i, v in enumerate(self._names):
                try:
                    values.append(float(tokens[i]))
                except ValueError as e:
                    if v.endswith('latticetype'):
                        values.append(tokens[i])
                    elif v.endswith('sim_id'):
                        values.append(tokens[i])
                    elif v.endswith('reason'):
                        values.append(tokens[i])
                    else:
                        print(v)
                        print(tokens[i])
                        raise
            table.append(values)

        self.df = pd.DataFrame(data=table, columns=self._names)

    def write(self, filename):
        fn = filename

        s = [",".join(self.names)]
        s += [",".join(self.types)]
        s += [
            ",".join([str(v) for v in k])
            for k in self.df[self.names].values.tolist()
        ]

        with open(fn, 'w') as f:
            f.write("\n".join(s))

    def write_subselect(self, filename=None):
        _filename = None
        if filename is None:
            _filename = "subselect.{}.out".format(".".join(self.score_names))
        elif type(filename) is str:
            _filename = filename
        else:
            err_msg = "the filename argument for this method must be a string"
            raise ValueError(err_msg)

        if type(self.sub_df) is None:
            err_msg = "no subselection has been done on the data"
            raise ValueError(err_msg)
        if not isinstance(self.sub_df, pd.DataFrame):
            print(type(self.sub_df))
            err_msg = "the sub_df attribute must be a pandas.DataFrame"
            raise ValueError(err_msg)

        # build the string
        str_out = ','.join([n for n in self.names]) + "\n"
        str_out += ','.join([t for t in self.types]) + "\n"
        for row in self.sub_df.iterrows():
            _row = [a for i, a in enumerate(row[1])]  #unpack tuple
            try:
                _row[0] = int(_row[0])  # row[0] is the sim_id
            except ValueError as e:
                raise
            #pass
            str_out += ','.join([str(s) for s in _row]) + "\n"

        with open(_filename, 'w') as f:
            f.write(str_out)

        return _filename
Esempio n. 24
0
if __name__ == "__main__":
    import pypospack.utils
    # pypospack root directory
    pypospack_root_dir = pypospack.utils.get_pypospack_root_directory()

    config_fn = os.path.join(
            pypospack_root_dir,
            'data','Si__sw__data','pareto_optimization_unconstrained',
            'pyposmat.config.in')
    data_fn = os.path.join(
            pypospack_root_dir,
            'data','Si__sw__data','pareto_optimization_unconstrained',
            'pyposmat.kde.20.out')

    o_config = PyposmatConfigurationFile()
    o_config.read(filename=config_fn)
    
    o_data = PyposmatDataFile()
    o_data.read(filename=data_fn)

    manifold_learn_config = OrderedDict()
    manifold_learn_config['manifold_type'] = 'tsne'
    manifold_learn_config['pypospack_config_fn'] = config_fn 
    manifold_learn_config['pypospack_data_fn'] =  data_fn

    fig,ax = plt.subplots(1,3)


    if manifold_learn_config['manifold_type'] == 'mds':
        manifold['config'] = OrderedDict()
        manifold['config']['n_components'] = 2
Esempio n. 25
0
    #        type=str,
    #        help="location of the data directory")
    #parse_args = parser.parse_args()

    _n_potentials = 30
    _data_fn = "data__Ni__eam__born_exp_bjs_01\pyposmat.results.0.out"
    #_data_fn = "results.temp.out"
    _config_fn = "data__Ni__eam__born_exp_bjs_01\pyposmat.config.in"
    _plot_fn = "rugplot.png"

    make_rugplots = False

    print(80*'-')
    print("reading the configuration file {}...".format(_config_fn))
    config=PyposmatConfigurationFile()
    config.read(filename=_config_fn)
    qoi_targets=get_qoi_targets(config)
    print("reading the data file {}...".format(_data_fn))
    datafile=PyposmatDataFile()
    datafile.read(filename=_data_fn)

    from pypospack.pareto import pareto

    df = copy.deepcopy(datafile.df)
    nr,nc = df.shape
    _nsimulations = OrderedDict()
    _nsimulations['start'] = nr
    abs_error_names = ["{}.abserr".format(q) for q in datafile.qoi_names]
    for q in datafile.qoi_names:
        qe = "{}.err".format(q)
        qne = "{}.abserr".format(q)
Esempio n. 26
0
class PyposmatParallelCoordinates(object):
    
    def __init__(self):
        self._configuration = PyposmatConfigurationFile()
        self._data = PyposmatDataFile()

    def set_configuration(self, configuration):
        if isinstance(configuration, str):
            self.set_configuration_by_path(path=configuration)
        elif isinstance(configuration, PyposmatConfigurationFile):
            self.set_configuration_by_object(config_obj=configuration)
        else:
            raise TypeError

    def set_data(self, data):
        if isinstance(data, str):
            self.data = PyposmatDataFile()
            self.data.read(data)
        elif isinstance(data, PyposmatDataFile):
            self.data = data
        else:
            raise TypeError
    
    def set_configuration_by_path(self, path):
        assert isinstance(path, str):
        self.configuration = PyposmatConfigurationFile()
        self.configuration.read(path)

    def set_configuration_by_obj(self, config_obj):
        assert isinstance(configuration, PyposmatConfigurationFile)
        self.configuration = config_obj

    def set_data_by_path(self, path):
        assert isinstance(path, str)
        self.data  = PyposmatDataFile()
        self.data.read(path)

    def set_data_by_obj(self, data_obj):
        assert isinstance(data_obj, PyposmatDataFile)
        self.data = data

    @property
    def configuration(self):
        return self._configuration

    @configuration.setter
    def configuration(self, configuration):
        assert isinstance(configuration, PyposmatConfigurationFile)
        self._configuration = configuration

    @property
    def data(self):
        return self._data

    @data.setter
    def data(self, data):
        assert isinstance(configuration, PyposmatDataFile)
        self._data = data

    def plot(self, path):
        assert isinstance(path, str)
Esempio n. 27
0
class PyposmatIterativeSampler(object):
    def __init__(self, configuration_filename, is_restart=False):
        self.RANK_DIR_FORMAT = 'rank_{}'
        self.mpi_comm = None
        self.mpi_rank = None
        self.mpi_size = None
        self.mpi_nprocs = None
        self.n_iterations = None
        self.rv_seed = None
        self.rv_seeds = None

        self.configuration_filename = configuration_filename
        self.configuration = None
        self.mc_sampler = None

        self.root_directory = os.getcwd()
        self.data_directory = 'data'
        self.is_restart = is_restart
        self.start_iteration = 0

    def run_restart(self):
        if self.configuration is None:
            self.configuration = PyposmatConfigurationFile()
            self.configuration.read(self.configuration_filename)

        # determine if there was a seed file
        _init_fn = self.find_initial_parameters_files()

        # get contents of the data directory if it exists
        _data_dir = os.path.join(self.root_directory, self.data_directory)
        self.i_iterations, _data_dir_fns = self.analyze_rank_directories(
            data_dir=_data_dir)

        # get contents of the rank directories
        _root_dir = self.root_directory
        n_ranks, _rank_data_fns = self.analyze_rank_directories(
            root_dir=_root_dir)

        if self.mpi_rank == 0:
            pass
        pass

    def run_all(self):
        self.setup_mpi_environment()
        self.determine_rv_seeds()
        MPI.COMM_WORLD.Barrier()

        self.start_iteration = 0
        for i in range(self.start_iteration, self.n_iterations):
            if self.mpi_rank == 0:
                print(80 * '-')
                print('{:80}'.format('BEGIN ITERATION {}/{}'.format(
                    i + 1, self.n_iterations)))
                print(80 * '-')
            MPI.COMM_WORLD.Barrier()

            self.run_simulations(i)
            MPI.COMM_WORLD.Barrier()

            if self.mpi_rank == 0:
                print('merging files')
                self.merge_files(i)
                self.analyze_results(i)
            MPI.COMM_WORLD.Barrier()
        print(80 * '-')
        print('JOBCOMPLETE')

    def run_simulations(self, i_iteration):
        self.rank_directory = self.RANK_DIR_FORMAT.format(self.mpi_rank)

        # if the directory exists delete it
        if os.path.isdir(self.rank_directory):
            shutil.rmtree(self.rank_directory)
        os.makedirs(self.rank_directory)

        # change execution context for this rank
        # this provides a directory for each worker directory so that the
        # disk IO writes don't conflict
        os.chdir(self.rank_directory)

        _config_filename = os.path.join(self.root_directory,
                                        self.configuration_filename)

        _results_filename = os.path.join(self.root_directory,
                                         self.rank_directory,
                                         'pyposmat.results.out')

        # set random seed
        np.random.seed(self.rv_seeds[self.mpi_rank, i_iteration])

        # initialize()
        self.mc_sampler = PyposmatMonteCarloSampler(
            filename_in=_config_filename,
            filename_out=_results_filename,
            mpi_rank=self.mpi_rank,
            mpi_size=self.mpi_size)
        self.mc_sampler.create_base_directories()
        self.mc_sampler.read_configuration_file()
        _structure_dir = self.mc_sampler.configuration.structures[
            'structure_directory']
        self.mc_sampler.configuration.structures['structure_directory'] = \
                os.path.join('..',_structure_dir)
        self.mc_sampler.configure_qoi_manager()
        self.mc_sampler.configure_task_manager()
        self.mc_sampler.configure_pyposmat_datafile_out()
        #pyposmat_datafile_out = PyposmatDataFile(filename_out)

        if self.mpi_rank == 0:
            self.mc_sampler.print_structure_database()
            self.mc_sampler.print_sampling_configuration()
        if self.mpi_rank == 0 and i_iteration == 0:
            self.mc_sampler.print_initial_parameter_distribution()
        if self.mpi_rank == 0:
            print(80 * '-')
        MPI.COMM_WORLD.Barrier()

        _mc_config = self.mc_sampler.configuration.sampling_type[i_iteration]
        _mc_sample_type = _mc_config['type']
        _mc_n_samples = _mc_config['n_samples']

        # determine number of sims for this rank
        _n_samples_per_rank = int(_mc_n_samples / self.mpi_size)
        if _mc_n_samples % self.mpi_size > self.mpi_rank:
            _n_samples_per_rank += 1

        if _mc_sample_type == 'parametric':
            self.mc_sampler.run_simulations(i_iteration=i_iteration,
                                            n_samples=_n_samples_per_rank)
        elif _mc_sample_type == 'kde':
            _filename_in = ''
            if 'file' in _mc_config:
                _filename_in = os.path.join(self.root_directory,
                                            _mc_config['file'])
            else:
                _filename_in = os.path.join(
                    self.root_directory, self.data_directory,
                    'pyposmat.kde.{}.out'.format(i_iteration))

            self.mc_sampler.run_simulations(i_iteration=i_iteration,
                                            n_samples=_n_samples_per_rank,
                                            filename=_filename_in)

        # get parameters from file
        elif _mc_sample_type == 'from_file':
            _filename_in = os.path.join(self.root_directory,
                                        _mc_config['file'])
            self.mc_sampler.run_simulations(i_iteration=i_iteration,
                                            n_samples=_n_samples_per_rank,
                                            filename=_filename_in)

        # return to root directory
        os.chdir(self.root_directory)

    def setup_mpi_environment(self):
        self.mpi_comm = MPI.COMM_WORLD
        self.mpi_rank = self.mpi_comm.Get_rank()
        self.mpi_size = self.mpi_comm.Get_size()
        self.mpi_procname = MPI.Get_processor_name()
        if self.mpi_rank == 0:
            self.print_mpi_environment()

    def print_mpi_environment(self):
        print(80 * '-')
        print('{:^80}'.format('MPI COMMUNICATION INFORMATION'))
        print(80 * '-')
        print('mpi_size={}'.format(self.mpi_size))

    def determine_rv_seeds(self):
        _randint_low = 0
        _randint_high = 2147483647

        # set original seed
        if self.rv_seed is None:
            self.rv_seed = np.random.randint(low=_randint_low,
                                             high=_randint_high)
        np.random.seed(self.rv_seed)

        # determine rank seed
        self.rv_seeds = np.random.randint(low=0,
                                          high=2147483647,
                                          size=(int(self.mpi_size),
                                                self.n_iterations))

        if self.mpi_rank == 0:
            self.print_random_seeds()

    def analyze_data_directories(self, data_dir=None):
        _d = data_dir
        i = 0
        contents = []
        if not os.path.exists(_d): return i, contents
        if not os.path.isdir(_d): return i, contents

        while True:
            kde_fn = os.path.join(_d, "pyposmat.kde.{}.out".format(i))
            if os.path.exists(kde_fn):
                contents.append(kde_fn)
            else:
                if i > 0:
                    contents.append(results_fn)
                    break

            results_fn = os.path.join(_d, "pyposmat.results.{}.out".format(i))
            if os.path.exists(results_fn): pass
            else: break
            i = i + 1

        return i, contents

    def analyze_rank_directories(self, root_dir=None):
        i = 0
        contents = []

        if root_dir is None:
            _d = self.root_directory
        else:
            _d = root_directory

        while True:
            rank_dir = os.path.join(_d, "rank_{}".format(i))
            if not os.path.exists(rank_dir): break
            if not os.path.isdir(rank_dir): break
            rank_fn = os.path.join("rank_{}".format(i), "pyposmat.results.out")
            if not os.path.exists(os.path.join(_d, rank_fn)): break
            if not os.path.isfile(os.path.join(_d, rank_fn)):
                break
            else:
                contents.append(rank_fn)
            i = i + 1
        return i, contents

    def find_initial_parameters_file(self):
        if 'file' in self.configuration.sampling_type[0]:
            _init_fn = os.path.join(
                self.root_directory,
                self.configuration.sampling_type[0]['file'])
            if os.path.exists(_init_fn):
                if os.path.isfile(_init_fn):
                    return _init_fn
                else:
                    return None

    def merge_pypospack_datafiles(datafile_fns):
        d0 = PyposmatDataFile()
        d0.read(filename=datafile_fns[0])
        df0 = d0.df
        for i in range(1, len(datafile_fns)):
            print("merging {}...".format(datafile_fns[i]))
            d = PyposmatDataFile()
            d.read(filename=datafile_fns[i])
            df = d.df

            df0 = pd.concat([df0, df]).drop_duplicates().reset_index(drop=True)
        d0.df = df0
        return d0

    def merge_files(self, i_iteration):

        _dir = self.data_directory
        _n_ranks = self.mpi_size

        datafile = None
        # filename of old kde file
        _filename_kde = os.path.join(_dir,
                                     'pyposmat.kde.{}.out'.format(i_iteration))

        print('Looking for previous kde file')
        print('    {}'.format(_filename_kde))

        datafile_fns = []
        if os.path.exists(_filename_kde):
            if os.path.isfile(_filename_kde):
                datafile_fns.append(_filename_kde)
        for i_rank in range(_n_ranks):
            rank_fn = os.path.join('rank_{}'.format(i_rank),
                                   'pyposmat.results.out')
            datafile_fns.append(rank_fn)

        names = ['sim_id']\
                + self.parameter_names\
                + self.qoi_names\
                + self.error_names
        types = ['sim_id']\
                + ['param']*len(self.parameter_names)\
                + ['qoi']*len(self.qoi_names)\
                + ['err']*len(self.error_names)

        dataframes = OrderedDict()
        for fn in datafile_fns:
            datafile = PyposmatDataFile()
            datafile.read(fn)
            #if fn.startswith('rank')
            #datafile.df['sim_id'] = datafile.df.apply(
            #    lambda x:"{}_{}_{}".format(
            #        i_iteration,i_rank,str(x['sim_id'])))
            dataframes[fn] = datafile.df[names]

        df = pd.concat(dataframes).reset_index(drop=True)
        datafile = PyposmatDataFile()
        datafile.df = df
        datafile.parameter_names = self.parameter_names
        datafile.error_names = self.error_names
        datafile.qoi_names = self.qoi_names
        datafile.names = names
        datafile.types = types
        try:
            fn_out = os.path.join(
                _dir, 'pyposmat.results.{}.out'.format(i_iteration))
            datafile.write(filename=fn_out)
        except FileNotFoundError as e:
            if not os.path.exists(self.data_directory):
                os.mkdir(self.data_directory)
                datafile.write(filename_fn_out)
            else:
                raise

    def analyze_results(self, i_iteration):
        data_fn = os.path.join(\
                self.root_directory,
                self.data_directory,
                'pyposmat.results.{}.out'.format(i_iteration))
        config_fn = os.path.join(\
                self.root_directory,
                self.configuration_filename)
        kde_fn = os.path.join(\
                self.root_directory,
                self.data_directory,
                'pyposmat.kde.{}.out'.format(i_iteration+1))

        data_analyzer = PyposmatDataAnalyzer()
        data_analyzer.read_configuration_file(filename=config_fn)
        data_analyzer.read_data_file(filename=data_fn)
        data_analyzer.write_kde_file(filename=kde_fn)

    def read_configuration_file(self, filename=None):
        assert isinstance(filename, str) or filename is None

        if filename is None:
            _filename_in = self.configuration_filename
        else:
            self.configuration_filename = filename
            _filename_in = filename

        self.configuration = PyposmatConfigurationFile()
        self.configuration.read(filename=_filename_in)

        self.n_iterations = self.configuration.n_iterations
        self.qoi_names = self.configuration.qoi_names
        self.error_names = self.configuration.error_names
        self.parameter_names = self.configuration.parameter_names

        print(self.parameter_names)
        print(self.qoi_names)
        print(self.error_names)

    def print_random_seeds(self):
        print(80 * '-')
        print('{:^80}'.format('GENERATED RANDOM SEEDS'))
        print(80 * '-')
        print()
        print('rv_seed={}'.format(self.rv_seed))
        print()
        print('{:^8} {:^8} {:^10}'.format('rank', 'iter', 'seed'))
        print('{} {} {}'.format(8 * '-', 8 * '-', 10 * '-'))
        for i_rank in range(self.mpi_size):
            for i_iter in range(self.n_iterations):
                print('{:^8} {:^8} {:>10}'.format(
                    i_rank, i_iter, self.rv_seeds[i_rank, i_iter]))
Esempio n. 28
0
class PyposmatBokehVisualizer(object):
    def __init__(self):
        bokeh_tools = ['box_select', 'reset', 'box_zoom', 'pan']
        self.bokeh_tools = ', '.join(bokeh_tools)

    def read_configuration(self, filename):
        self.configuration = PyposmatConfigurationFile()
        self.configuration.read(filename=filename)

    def read_data(self, filename):
        self.datafile = PyposmatDataFile()
        self.datafile.read(filename=filename)

        self.parameter_names = list(self.datafile.parameter_names)
        self.qoi_names = list(self.datafile.qoi_names)
        self.error_names = list(self.datafile.error_names)

        self.param_names = list(self.datafile.parameter_names)
        self.qoi_names = list(self.datafile.qoi_names)
        self.err_names = list(self.datafile.error_names)
        print("parameter names")
        print(type(self.param_names))
        for i, v in enumerate(self.param_names):
            print("{:3} {:<20}".format(i, v))
        print("qoi names")
        print(type(self.qoi_names))
        for i, v in enumerate(self.qoi_names):
            print("{:3} {:<20}".format(i, v))
        print("error_names")
        print(type(self.err_names))
        for i, v in enumerate(self.err_names):
            print("{:3} {:<20}".format(i, v))
        # generate pandas dataframes
        self.param_df = copy.deepcopy(self.datafile.df[self.param_names])
        self.qoi_df = copy.deepcopy(self.datafile.df[self.qoi_names])
        self.err_df = copy.deepcopy(self.datafile.df[self.err_names])
        self.total_df = pd.concat([self.param_df, self.qoi_df, self.err_df],
                                  axis=1)

    def update_data(self, param_x, param_y, err_x, err_y):
        self.total_df['param_x'] = self.total_df[param_x]
        self.total_df['param_y'] = self.total_df[param_y]
        self.total_df['err_x'] = self.total_df[err_x]
        self.total_df['err_y'] = self.total_df[err_y]
        self.source.data = dict(param_x=self.total_df['param_x'],
                                param_y=self.total_df['param_y'],
                                err_x=self.total_df['err_x'],
                                err_y=self.total_df['err_y'])

    def nix(self, val, lst):
        return [x for x in lst if x != val]

    def setup_bokeh_frame(self, doc):
        self.source = ColumnDataSource(
            data=dict(param_x=[], param_y=[], err_x=[], err_y=[]))
        self.source_static = ColumnDataSource(
            data=dict(param_x=[], param_y=[], err_x=[], err_y=[]))
        '''
        ---------------------------------------------------------------
        Define Param Graph
        ---------------------------------------------------------------
        '''
        self.param_graph = {}
        self.param_graph['obj_x_select'] = Select(value=self.param_names[0],
                                                  options=self.nix(
                                                      self.param_names[1],
                                                      self.param_names))

        self.param_graph['obj_y_select'] = Select(value=self.param_names[1],
                                                  options=self.nix(
                                                      self.param_names[0],
                                                      self.param_names))

        self.param_graph['x_min_entry'] = TextInput(placeholder='Min X Value',
                                                    value='')
        self.param_graph['x_max_entry'] = TextInput(placeholder='Max X Value',
                                                    value='')
        self.param_graph['y_min_entry'] = TextInput(placeholder='Min Y Value',
                                                    value='')
        self.param_graph['y_max_entry'] = TextInput(placeholder='Max Y Value',
                                                    value='')

        self.param_graph['plot_width'] = 610
        self.param_graph['plot_height'] = 400
        self.param_graph['tools'] = self.bokeh_tools
        self.param_graph['obj_figure'] = figure(
            plot_width=self.param_graph['plot_width'],
            plot_height=self.param_graph['plot_height'],
            tools=self.param_graph['tools'],
            title=self.param_graph['obj_x_select'].value + ' vs. ' +
            self.param_graph['obj_y_select'].value)
        self.param_graph['obj_figure'].xaxis.axis_label = self.param_graph[
            'obj_x_select'].value
        self.param_graph['obj_figure'].yaxis.axis_label = self.param_graph[
            'obj_y_select'].value
        self.param_graph['obj_glyph'] = Circle(x='param_x',
                                               y='param_y',
                                               size=1,
                                               fill_color='#5F77D5',
                                               line_color='#5F77D5')
        self.param_graph['obj_figure'].add_glyph(self.source,
                                                 self.param_graph['obj_glyph'])
        '''
        ---------------------------------------------------------------
        Define Err Graph
        ---------------------------------------------------------------
        '''
        self.err_graph = {}
        self.err_graph['obj_x_select'] = Select(value=self.err_names[0],
                                                options=self.nix(
                                                    self.err_names[1],
                                                    self.err_names))
        self.err_graph['obj_y_select'] = Select(value=self.err_names[1],
                                                options=self.nix(
                                                    self.err_names[0],
                                                    self.err_names))

        self.err_graph['x_min_entry'] = TextInput(placeholder='Min X Value',
                                                  value='')
        self.err_graph['x_max_entry'] = TextInput(placeholder='Max X Value',
                                                  value='')
        self.err_graph['y_min_entry'] = TextInput(placeholder='Min Y Value',
                                                  value='')
        self.err_graph['y_max_entry'] = TextInput(placeholder='Max Y Value',
                                                  value='')

        self.err_graph['plot_width'] = 610
        self.err_graph['plot_height'] = 400
        self.err_graph['tools'] = self.bokeh_tools
        self.err_graph['obj_figure'] = figure(
            plot_width=self.err_graph['plot_width'],
            plot_height=self.err_graph['plot_height'],
            tools=self.err_graph['tools'],
            title=self.err_graph['obj_x_select'].value + ' vs. ' +
            self.err_graph['obj_y_select'].value)
        self.err_graph['obj_figure'].xaxis.axis_label = self.err_graph[
            'obj_x_select'].value
        self.err_graph['obj_figure'].yaxis.axis_label = self.err_graph[
            'obj_y_select'].value
        self.err_graph['obj_glyph'] = Circle(x='err_x',
                                             y='err_y',
                                             size=1,
                                             fill_color='#5F77D5',
                                             line_color='#5F77D5')
        self.err_graph['obj_figure'].add_glyph(self.source,
                                               self.err_graph['obj_glyph'])

        def update():
            param_name_x = self.param_graph['obj_x_select'].value
            param_name_y = self.param_graph['obj_y_select'].value
            err_name_x = self.err_graph['obj_x_select'].value
            err_name_y = self.err_graph['obj_y_select'].value

            self.update_data(param_name_x, param_name_y, err_name_x,
                             err_name_y)

        param_widgets = bokeh.layouts.row(self.param_graph['obj_x_select'],
                                          self.param_graph['obj_y_select'])
        param_x_entry = bokeh.layouts.row(self.param_graph['x_min_entry'],
                                          self.param_graph['x_max_entry'])
        param_y_entry = bokeh.layouts.row(self.param_graph['y_min_entry'],
                                          self.param_graph['y_max_entry'])
        param_pane = bokeh.layouts.column(param_widgets,
                                          self.param_graph['obj_figure'],
                                          param_x_entry, param_y_entry)
        err_widgets = bokeh.layouts.row(self.err_graph['obj_x_select'],
                                        self.err_graph['obj_y_select'])
        err_x_entry = bokeh.layouts.row(self.err_graph['x_min_entry'],
                                        self.err_graph['x_max_entry'])
        err_y_entry = bokeh.layouts.row(self.err_graph['y_min_entry'],
                                        self.err_graph['y_max_entry'])
        err_pane = bokeh.layouts.column(err_widgets,
                                        self.err_graph['obj_figure'],
                                        err_x_entry, err_y_entry)
        layout = bokeh.layouts.row(param_pane, err_pane)
        doc.add_root(layout)
        update()

        # callback functions
        def param_x_select_change(attrname, old, new):
            self.source.data['param_x'] = self.total_df[new]
            self.param_graph[
                'obj_figure'].title.text = new + ' vs. ' + self.param_graph[
                    'obj_y_select'].value
            self.param_graph['obj_figure'].xaxis.axis_label = new

        def param_y_select_change(attrname, old, new):
            self.source.data['param_y'] = self.total_df[new]
            self.param_graph['obj_figure'].title.text = self.param_graph[
                'obj_x_select'].value + ' vs. ' + new
            self.param_graph['obj_figure'].yaxis.axis_label = new

        self.param_graph['obj_x_select'].on_change('value',
                                                   param_x_select_change)
        self.param_graph['obj_y_select'].on_change('value',
                                                   param_y_select_change)

        def err_x_select_change(attrname, old, new):
            self.source.data['err_x'] = self.total_df[new]
            self.err_graph[
                'obj_figure'].title.text = new + ' vs. ' + self.err_graph[
                    'obj_y_select'].value
            self.err_graph['obj_figure'].xaxis.axis_label = new

        def err_y_select_change(attrname, old, new):
            self.source.data['err_y'] = self.total_df[new]
            self.err_graph['obj_figure'].title.text = self.err_graph[
                'obj_x_select'].value + ' vs. ' + new
            self.err_graph['obj_figure'].yaxis.axis_label = new

        self.err_graph['obj_x_select'].on_change('value', err_x_select_change)
        self.err_graph['obj_y_select'].on_change('value', err_y_select_change)

        def source_callback(attrname, old, new):
            selected_index_list = list(new['1d']['indices'])
            selected_rows = []
            for i in selected_index_list:
                data_row = self.total_df.ix[i]
                selected_rows.append(data_row)
            formatted_rows = []
            for rows in selected_rows:
                rows = rows[:
                            -4]  # remove the 4 copied columns used in source callback
                formatted_rows.append(list(rows.get_values()))
            '''
            for rows in selected_rows:
                param_x_row = self.param_graph['obj_x_select'].value+': '+str(rows[self.param_graph['obj_x_select'].value])
                param_y_row = self.param_graph['obj_y_select'].value+': '+str(rows[self.param_graph['obj_y_select'].value])
                err_x_row = self.err_graph['obj_x_select'].value+': '+str(rows[self.err_graph['obj_x_select'].value])
                err_y_row = self.err_graph['obj_y_select'].value+': '+str(rows[self.err_graph['obj_y_select'].value])
                formatted_rows.append(str(param_x_row)+' '+str(param_y_row)+' '+str(err_x_row)+' '+str(err_y_row))
            '''
            with open('selected_points.txt', 'w') as f:
                f.write(' '.join(self.param_names) + ' ' +
                        ' '.join(self.err_names) + '\n')
                for fr in formatted_rows:
                    # apparently python cannot write a list to a file so the extra formatting is necessary
                    fr = str(fr)
                    fr.replace('[', '')
                    fr.replace(']', '')
                    f.write(fr + '\n')

        self.source.on_change('selected', source_callback)

        def param_x_min_callback(attrname, old, new):
            self.param_graph['obj_figure'].x_range.start = float(new)

        def param_x_max_callback(attrname, old, new):
            self.param_graph['obj_figure'].x_range.end = float(new)

        def param_y_min_callback(attrname, old, new):
            self.param_graph['obj_figure'].y_range.start = float(new)

        def param_y_max_callback(attrname, old, new):
            self.param_graph['obj_figure'].y_range.end = float(new)

        self.param_graph['x_min_entry'].on_change('value',
                                                  param_x_min_callback)
        self.param_graph['x_max_entry'].on_change('value',
                                                  param_x_max_callback)
        self.param_graph['y_min_entry'].on_change('value',
                                                  param_y_min_callback)
        self.param_graph['y_max_entry'].on_change('value',
                                                  param_y_max_callback)

        def err_x_min_callback(attrname, old, new):
            self.err_graph['obj_figure'].x_range.start = float(new)

        def err_x_max_callback(attrname, old, new):
            self.err_graph['obj_figure'].x_range.end = float(new)

        def err_y_min_callback(attrname, old, new):
            self.err_graph['obj_figure'].y_range.start = float(new)

        def err_y_max_callback(attrname, old, new):
            self.err_graph['obj_figure'].y_range.end = float(new)

        self.err_graph['x_min_entry'].on_change('value', err_x_min_callback)
        self.err_graph['x_max_entry'].on_change('value', err_x_max_callback)
        self.err_graph['y_min_entry'].on_change('value', err_y_min_callback)
        self.err_graph['y_max_entry'].on_change('value', err_y_max_callback)

    def start_bokeh_server(self):
        self.bokeh_app = Application(FunctionHandler(self.setup_bokeh_frame))
        self.bokeh_server = Server({'/': self.bokeh_app}, num_procs=1)
        self.bokeh_server.start()

        # start io loop for bokeh_server
        self.bokeh_server.io_loop.add_callback(self.bokeh_server.show, '/')
        self.bokeh_server.io_loop.start()
Esempio n. 29
0
parallel_plot_config['p_3.5_q_0.5']['data_fn'] = os.path.join(
    parallel_plot_config['p_3.5_q_0.5']['data_directory'],
    'pyposmat.kde.19.out')

if __name__ == "__main__":
    # initialization
    o_plot = PyposmatParallelCoordinatesPlot()

    # add data to plot
    for k, v in parallel_plot_config.items():
        print(k, v)
        if k == 'args':
            pass
        else:
            o_config = PyposmatConfigurationFile()
            o_config.read(filename=v['config_fn'])

            o_data = PyposmatDataFile()
            o_data.read(filename=v['data_fn'])
            o_data.create_normalized_errors(qoi_targets=o_config.qoi_targets)

            o_plot.add_dataframe(color=v['color'],
                                 label=v['label'],
                                 obj=copy.deepcopy(o_data.df),
                                 names=o_data.normalized_names)

    o_plot.make_plot(filename="parallel_plot.png",
                     xlabels=o_data.normalized_error_names,
                     ylabel="% error",
                     title="Si sw",
                     ylim=(-175, 25),
Esempio n. 30
0
from sklearn.cluster import DBSCAN, KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import pandas as pd
from pandas.tools.plotting import parallel_coordinates

import copy

if __name__ == "__main__":
    # define paths to configuration and data files
    configuration_path = "/home/seaton/python-repos/pypospack/examples/Ni__eam__born_exp_fs__sensitivityanalysis/data__from_pareto_optimization/pyposmat.config.in"
    data_path = "/home/seaton/python-repos/pypospack/examples/Ni__eam__born_exp_fs__sensitivityanalysis/data__from_pareto_optimization/pyposmat.kde.5.out"

    # init the configuration object
    o_config = PyposmatConfigurationFile()
    o_config.read(configuration_path)

    # init the data file object
    o_data = PyposmatDataFile()
    o_data.read(data_path)

    # normalize the QOIs to prepare for tSNE
    o_normalizer = StandardScaler()
    normal_qoi_arr = o_normalizer.fit_transform(o_data.qoi_df)
    normal_qoi_df = pd.DataFrame(data=normal_qoi_arr, columns=o_data.qoi_names)

    # learn and apply tSNE manifold to the normal QOIs
    o_tsne = TSNE()
    tsne_arr = o_tsne.fit_transform(normal_qoi_df)

    # find KMeans clusters in the tSNE space