class FileSampler(): def __init__(self, configuration, data, structure_name, structure_path, workflow_type, workflow_definition): self.initialize_configuration(configuration) self.initialize_data(data) self.structure_name = structure_name self.strucutre_path = structure_path self.workflow_type = workflow_type self.workflow_definition = workflow_definition self.potential_definition = self.configuration.potential def initialize_configuration(self,configuration): if isinstance(configuration,PyposmatConfigurationFile): self.configuration = configuration elif isinstance(configuration,str): self.configuration = PyposmatConfigurationFile() self.configuration.read(filename=configuration) else: msg = ("configuration must be a path to a configuration file or an " "instance of the PyposmatConfigurationFile,") raise TypeError(msg) def initialize_data(self,data): if isinstance(data,PyposmatDataFile): self.data = data elif isinstance(data,str): self.data = PyposmatDataFile() self.data.read(filename=data) else: msg = ("data must be a path to a data file or an instance of " "PyposmatDataFile.") raise TypeError(msg) def run(self): for index,row in self.data.df.iterrows(): sim_id = row['sim_id'] print('working on sim_id:{}'.format(sim_id)) parameters = OrderedDict([(k,row[k]) for k in self.configuration.parameter_names]) original_path = os.getcwd() os.mkdir(sim_id) os.chdir(sim_id) if workflow_type == 'lmps_thermal_expansion': workflow = LammpsThermalExpansion( structure_name=Si_structure_definition['name'], structure_path=Si_structure_definition['filename'], **workflow_definition) workflow.create_task_configurations() workflow.create_tasks() workflow.prepare_tasks( potential_definition = self.potential_definition, potential_parameters = parameters) workflow.run() os.chdir(original_path)
def calculate_kld_parameters(config,data_directory,kld_param_fn='pyposmat.kld_param.out'): assert isinstance(config,str) or isinstance(config,PyposmatConfigurationFile) assert os.path.isdir(data_directory) assert isinstance(kld_param_fn,str) # process the the configuration argument, the configuration argument has two # options for processing # (1) if config is a str, the config is assumed to be a path to the # the path to the configuration file, and o_config is initialized from it # (2) if config is PyposmatConfigurationFile object, then o_config is set to it if isinstance(config,str): o_config = PyposmatConfigurationFile() o_config.read(filename=config) else: assert isinstance(config,PyposmatConfigurationFile) o_config = config kld = OrderedDict() for i in range(o_config.n_iterations): kld[i] = OrderedDict() if i == 0: kld[i]['results'] = None kld[i]['kde'] = None kld[i]['filter'] = calculate_kld( data_1_fn=os.path.join(data_directory,'pyposmat.results.{}.out'.format(i)), data_2_fn=os.path.join(data_directory,'pyposmat.kde.{}.out'.format(i+1)), names = o_config.free_parameter_names, n_samples=n_samples) else: kld[i]['results'] = calculate_kld( data_1_fn=os.path.join(data_directory,'pyposmat.results.{}.out'.format(i-1)), data_2_fn=os.path.join(data_directory,'pyposmat.results.{}.out'.format(i)), names = o_config.free_parameter_names, n_samples=n_samples) kld[i]['kde'] = calculate_kld( data_1_fn=os.path.join(data_directory,'pyposmat.kde.{}.out'.format(i)), data_2_fn=os.path.join(data_directory,'pyposmat.kde.{}.out'.format(i+1)), names = o_config.free_parameter_names, n_samples=n_samples) kld[i]['filter'] = calculate_kld( data_1_fn=os.path.join(data_directory,'pyposmat.results.{}.out'.format(i)), data_2_fn=os.path.join(data_directory,'pyposmat.kde.{}.out'.format(i+1)), names = o_config.free_parameter_names, n_samples=n_samples) print(i) # write out the kld_parameters file with open(kld_param_fn,'w') as f: f.write(",".join(['iteration','results','kde','filter'])+"\n") for kld_iteration,kld_row in kld.items(): s_list = [] s_list.append(kld_iteration) for k in ['results','kde','filter']: if kld_row[k] is None: s_list.append(float('NaN')) else: s_list.append(kld_row[k][0]) f.write(",".join([str(s) for s in s_list])+"\n")
def do_attribute_tests(sampler, config_fn): from pypospack.pyposmat.engines import PyposmatBaseSampler from pypospack.pyposmat.data import PyposmatConfigurationFile # test arguments assert type(sampler) is PyposmatBaseSampler assert type(config_fn) is str config = PyposmatConfigurationFile() config.read(filename=config_fn) assert sampler.structure_directory == config.structures[ 'structure_directory'] assert sampler.n_iterations == config.sampling_type['n_iterations'] assert sampler.parameter_names == config.parameter_names assert sampler.qoi_names == config.qoi_names assert sampler.error_names == config.error_names assert sampler.free_parameter_names == config.free_parameter_names assert set(sampler.parameter_constraints.keys()) == set( config.sampling_constraints.keys()) assert all([ sampler.parameter_constraints[k] == config.sampling_constraints[k] for k in sampler.parameter_constraints ]) assert sampler.parameter_constraints == config.sampling_constraints assert sampler.constrained_parameter_names == \ [p for p in sampler.parameter_names if p not in sampler.free_parameter_names]
def test____init____using_path_args(): testing_set = get_testing_set() o = PyposmatDataAnalyzer( config_fn=testing_set['config_fn'], results_data_fn=testing_set['results_data_fn'] ) config = PyposmatConfigurationFile() config.read(filename=testing_set['config_fn']) assert isinstance(o,PyposmatDataAnalyzer) assert o.config_fn == testing_set['config_fn'] assert isinstance(o.configuration,PyposmatConfigurationFile) assert isinstance(o.parameter_names,list) assert set(o.parameter_names) == set(config.parameter_names) assert isinstance(o.error_names, list) assert set(o.error_names) == set(config.error_names) assert isinstance(o.qoi_names, list) assert set(o.qoi_names) == set(config.qoi_names) assert o.results_data_fn == testing_set['results_data_fn'] assert isinstance(o.results_data,PyposmatDataFile) assert isinstance(o.results_df,pd.DataFrame)
def make_latex_table(config, data, qoi_type=None, param_type=None): qoi_types = ['by_qoi_target'] param_type = [] assert isinstance(config,str) \ or isinstance(config,PyposmatConfigurationFile) assert isinstance(data,str) \ or isinstance(data,PyposmatDataFile) if isinstance(config, str): o_config = PyposmatConfigurationFile() o_config.read(filename=config) elif isinstance(config, PyposmatConfigurationFile): o_config = config else: raise TypeError() if isinstance(data, str): o_data = PyposmatDataFile() o_data.read(filename=data) elif isinstance(data, PyposmatDataFile): o_data = data else: raise TypeError() if qoi_type == 'by_qoi_target': o_data.create_normalized_errors(normalize_type='by_qoi_target', qoi_targets=o_config.qoi_targets) df = o_data.df[o_data.normalized_error_names]
def test__initialize_configuration__with_object(): testing_set = get_testing_set() o_config = PyposmatConfigurationFile() o_config.read(filename=testing_set['config_fn']) o = PyposmatDataAnalyzer() o.initialize_configuration(o_config=o_config)
def check_pyposmat_configuration(args): _config_fn = args.configuration print('checking pyposmat configuration file') print('pyposmat_configuration_file:{}'.format(_config_fn)) from pypospack.pyposmat.data import PyposmatConfigurationFile o = PyposmatConfigurationFile() o.read(filename=_config_fn) o.validate()
def test__initialize_configuration__with_object_and_path(): testing_set = get_testing_set() o_config = PyposmatConfigurationFile() o_config.read(filename=testing_set['config_fn']) o = PyposmatDataAnalyzer() with pytest.raises(TypeError) as e: o.initialize_configuration(config_fn=testing_set['config_fn'], o_config=o_config)
def get_qoi_database_from_PyposmatConfigurationFile(config_fn): config = PyposmatConfigurationFile() config.read(filename=config_fn) assert type(config.qois) is OrderedDict for qoi_id, qoi_info in config.qois.items(): assert type(qoi_id) is str assert set(qoi_info.keys()) == set(['qoi_type','structures','target']) return config.qois
def gmm_analysis(config_fn, data_fn, names, output_directory='gmm_analysis', max_components=20): assert isinstance(config_fn, str) assert isinstance(data_fn, str) assert os.path.isfile(config_fn) assert os.path.isfile(data_fn) if not os.path.isdir(output_directory): os.mkdir(output_directory) o_config = PyposmatConfigurationFile() o_config.read(filename=config_fn) o_data = PyposmatDataFile() o_data.read(filename=data_fn) o_data.create_normalized_errors(normalize_type='by_qoi_target', qoi_targets=o_config.qoi_targets) o_data.df['score'] = o_data.df[o_config.normalized_error_names].abs().sum( axis=1) data = o_data.df[names] n_components = np.arange(1, max_components) models = [ GaussianMixture(n_components=n, covariance_type='full', random_state=0).fit(data) for n in n_components ] # AIC analysis aic, aic_idx = min( (val, idx) for (idx, val) in enumerate([m.aic(data) for m in models])) aic_n_components = n_components[aic_idx] aic_criteria = [m.aic(data) for m in models] # BIC analysis bic, bic_idx = min( (val, idx) for (idx, val) in enumerate([m.bic(data) for m in models])) bic_n_components = n_components[bic_idx] bic_criteria = [m.bic(data) for m in models] #plot the criteria print('bic_n_components:{}'.format(bic_n_components)) print('aic_n_components:{}'.format(aic_n_components)) plot_fn = os.path.join(output_directory, 'aic_bic_plot.jpg') plot_gmm_aic_bic(filename=plot_fn, n_components=n_components, aic_criteria=aic_criteria, bic_criteria=bic_criteria, aic_n_components=aic_n_components, bic_n_components=bic_n_components) filename = os.path.join('gmm_analysis', 'gmm_analysis.jpg') plot_gmm(models[bic_n_components], data, filename=filename)
def test__read_configuration(): from pypospack.pyposmat.data import PyposmatConfigurationFile o_config = PyposmatConfigurationFile() o_config.read(filename=config_fn) o_rugplot = PyposmatParetoRugplot() o_rugplot.read_configuration(filename=config_fn) assert type(o_rugplot.parameter_names) is list assert type(o_rugplot.qoi_names) is list assert type(o_rugplot.error_names) is list assert type(o_rugplot.qoi_validation_names) is list assert type(o_rugplot.error_validation_names) is list assert isinstance(o_rugplot.qoi_targets, dict)
class PyposmatPostProcessorTestHarness(object): def __init__(self, configuration_fn, datafile_fn): self.configuration_fn = configuration_fn self.datafile_fn = datafile_fn if configuration_fn is not None: self.configuration = PyposmatConfigurationFile() self.configuration.read(configuration_fn) if datafile_fn is not None: self.datafile = PyposmatDataFile() self.datafile.read(filename=datafile_fn) def get_parameter_names(self): return self.configuration.parameter_names
def write_configuration_file(config_fn): import Si_sw #------------------------------------------------------------------------------ # WRITE CONFIGURATION FILE #------------------------------------------------------------------------------ Si_sw_configuration = PyposmatConfigurationFile() Si_sw_configuration.qois = Si_sw.Si_sw_qoi_db.qois Si_sw_configuration.potential = Si_sw.Si_sw_potential Si_sw_configuration.structures = Si_sw.Si_sw_structures Si_sw_configuration.sampling_type = Si_sw.Si_sw_sampling Si_sw_configuration.sampling_distribution = Si_sw.Si_sw_parameter_distribution Si_sw_configuration.write(filename=config_fn) Si_sw_configuration.read(filename=config_fn)
def show_qoi_targets(config_fn, data_fn): o_config = PyposmatConfigurationFile() o_config.read(filename=config_fn) o_data = PyposmatDataFile() o_data.read(filename=data_fn) for qoi_name, qoi_target in o_config.qoi_targets.items(): try: qoi_avg = o_data.df[qoi_name].mean() except KeyError as e: qoi_avg = 'no value' s = "{:20} {:10} {:10}".format(qoi_name,qoi_target,qoi_avg) print(s)
def write_configuration_file(config_fn): """ Args: config_fn(str): the name of the configuration file """ from pypospack.pyposmat.data import PyposmatDataFile import Ni__eam__morse_exp_universal as Ni__eam Ni_eam_configuration = PyposmatConfigurationFile() Ni_eam_configuration.qois = Ni_eam.Ni_qoi_db.qois Ni_eam_configuration.potential = Ni_eam.Ni_eam_potential_formalism Ni_eam_configuration.structures = Ni_eam.Ni_structure_db Ni_eam_configuration.sampling_type = Ni_eam.Ni_eam_sampling Ni_eam_configuration.sampling_distribution =Ni_eam.Ni_eam_parameter_distribution Ni_eam_configuration.write(filename=config_fn) Ni_eam_configuration.read(filename=config_fn)
class BaseAnalysis(object): def __init__(self, configuration, data, output_path=None): self.configuration = None self.data = None self.output_path = None self._initialize_configuration(configuration=configuration) self._initialize_data(data=data) self._initialize_output_path(path=output_path) def _initialize_configuration(self, configuration): if isinstance(configuration, str): assert os.path.isfile(configuration) self.configuration = PyposmatConfigurationFile() self.configuration.read(filename=configuration) elif isinstance(configuration, PyposmatConfigurationFile): self.configuration = configuration else: raise TypeError('configuration cannot be type:{}'.format( str(type(configuration)))) def _initialize_data(self, data): if isinstance(data, str): assert os.path.isfile(data) self.data = PyposmatDataFile() self.data.read(filename=data) elif isinstance(data, PyposmatDataFile): self.data = deepcopy(data) else: raise TypeError('data cannot be type:{}'.format(str(type(data)))) self.data.create_normalized_errors( normalize_type='by_qoi_target', qoi_targets=self.configuration.qoi_targets) def _initialize_output_path(self, path): if path is None: self.output_path = None elif isinstance(path, str): if os.path.isdir(path): shutil.rmtree(path) os.mkdir(path) self.output_path = path else: raise TypeError
def make_rug_plot(config_fn, data_fn, ax=None, plot_fn='rugplot.png'): o_config = PyposmatConfigurationFile() o_config.read(filename=config_fn) o_data = PyposmatDataFile() o_data.read(filename=data_fn) qoi_targets = o_config.qoi_targets #qoi_targets = get_qoi_targets(o_config) error_names = o_data.error_names qoi_names = o_data.qoi_names # create normalized error df = copy.deepcopy(o_data.df[error_names]) for qn in qoi_names: en = "{}.err".format(qn) nen = "{}.nerr".format(qn) q = qoi_targets[qn] df[nen]=o_data.df[en]/q-q (_nrows,_ncols) = o_data.df.shape if ax is None: fig, ax = plt.subplots(nrows=1,ncols=1) for iq,qn in enumerate(qoi_names): _yloc = [iq+1] ax.scatter( df["{}.nerr".format(qn)], _nrows*[iq+1], marker='|', s=100., color='k' ) plt.sca(ax) plt.yticks(range(len(qoi_names)+1),['']+qoi_names) fig.savefig(plot_fn)
class Manifold(object): def __init__(self,pyposmat_configuration,pyposmat_data,manifold_config=None): self.configuration = None self.data = None self.manifold_configuration = None self.initialize_configuration(pyposmat_configuration=pyposmat_configuration) self.initialize_data(pyposmat_data=pyposmat_data) self.initialize_manifold_config(manifold_config=manifold_config) def initialize_configuration(self,pyposmat_configuration): if isinstance(pyposmat_configuration,PyposmatConfigurationFile): self.configuration = pyposmat_configuration elif isinstance(pyposmat_configuration,str): self.configuration = PyposmatConfigurationFile() self.configuration.read(filename=pyposmat_configuration) else: raise TypeError('pyposmat_configuration must be either a path or PyposmatConfigurationFile') def initialize_data(self,pyposmat_data): if isinstance(pyposmat_data, PyposmatDataFile): self.data = pyposmat_data elif isinstance(pyposmat_data, str): self.data = PyposmatDataFile self.data.read(filename=pyposmat_data) else: raise TypeError('pyposmat_data must either be a path or a PyposmatDataFile') def initialize_manifold_configuration(self,manifold_configuration=None): if manifold_configuration is None: self.manifold_configuration = None else: raise NotImplementedError def learn_manifold(self,names,scaling_type): raise NotImplementedError def scale_data(self,X,scaling_type): if scaling_type='standard': X_scaled = preprocessing.scale(X) elif scaling_type='none': X_scaled = X
def set_xticks(self,config,names,ax=None): if isinstance(config,PyposmatConfigurationFile): config_ = config elif isinstance(config,str): config_ = PyposmatConfigurationFile() config_.read(config) else: raise TypeError() if names == 'qoi_names': names_ = config_.qoi_names elif isinstance(names,list): names_ = names else: raise TypeError() if ax is None: ax_ = self.ax latex_labels = [config_.latex_labels[k]['label'] for k in names_] print(latex_labels) ax_.set_xticks(range(len(latex_labels))) ax_.set_xticklabels(latex_labels)
def test__get_header_string(): testing_set = get_testing_set() o_config = PyposmatConfigurationFile() o_config.read(filename=testing_set['config_fn']) assert os.path.isfile(testing_set['config_fn']) f = PyposmatBadParametersFile(filename=testing_set['badparameters_out_fn'], config_fn=testing_set['config_fn']) s = f.get_header_string() assert type(s) is str header_line_1 = ['sim_id'] \ + o_config.parameter_names \ + ['reason'] header_line_2 = ['sim_id'] \ + len(o_config.parameter_names)*['param'] \ + ['reason'] s_test = "{}\n".format(",".join(header_line_1)) s_test += "{}\n".format(",".join(header_line_2)) assert s_test == s
def test____init____w_filename_config_fn(): testing_set = get_testing_set() assert os.path.isfile(testing_set['badparameters_in_fn']) assert os.path.isfile(testing_set['config_fn']) if os.path.isfile(testing_set['badparameters_out_fn']): m = "removing the badparameter_out_file:{}".format( testing_set['badparameters_out_fn']) print(m) os.remove(testing_set['badparameters_out_fn']) o = PyposmatBadParametersFile(filename=testing_set['badparameters_out_fn'], config_fn=testing_set['config_fn']) assert o.filename == testing_set['badparameters_out_fn'] from pypospack.pyposmat.data import PyposmatConfigurationFile o_config = PyposmatConfigurationFile() o_config.read(filename=testing_set['config_fn']) assert isinstance(o.configuration, PyposmatConfigurationFile) assert isinstance(o.parameter_names, list) assert set(o.parameter_names) == set(o_config.parameter_names)
def test__read_configuration_file(): _config_fn = _scenario_MgO_buckingham['configuration_fn'] _parameter_names = _scenario_MgO_buckingham['parameter_names'] _free_parameter_names = _scenario_MgO_buckingham['free_parameter_names'] o_config = PyposmatConfigurationFile() o_config.read(filename=_config_fn) # check parameter_names, order doesn't matter # 1. check that the lists are of the same size # 2. check that if x in A, then x also in B # parameter check task 1 assert len(o_config.parameter_names) == len(_parameter_names) # parameter check task 2 for pn in o_config.parameter_names: assert pn in _parameter_names # check free parameter_names, order doesn't matter # 1. check that the list are of the same size # 2. check that if x in A, then x also in B assert len(o_config.free_parameter_names) == len(_free_parameter_names) for pn in o_config.free_parameter_names: assert pn in _free_parameter_names
class PyposmatBadParametersFile(object): def __init__( self, filename='pyposmat.badparameters.out', config_fn=None, o_config=None, ): assert filename is None or isinstance(filename, str) assert config_fn is None or isinstance(config_fn, str) assert o_config is None or isinstance(o_config, PyposmatConfigurationFile) self.filename = filename self._parameter_names = None self.w_cluster_id = False self.df = None self.parameter_df = None self.initialize_configuration(config_fn=config_fn, o_config=o_config) def initialize_configuration(self, config_fn, o_config): if isinstance(config_fn, str) and o_config is None: self.configuration = PyposmatConfigurationFile() self.configuration.read(filename=config_fn) elif isinstance(o_config, PyposmatConfigurationFile) and config_fn is None: self.configuration = o_config elif config_fn is None and o_config is None: self.configuration = None elif isinstance(o_config, PyposmatConfigurationFile) and isinstance( config_fn, str): m = ( "Cannot configure the PyposmatDataAnalyzer with both options " "o_config and config_fn both being specified. Choose only one." ) raise TypeError(m) else: m = ("wrong types:\n" "\to_config:{}\n" "\tconfig_fn:{}\n") m = m.format(type(o_config), type(config_fn)) raise TypeError(m) @property def parameter_names(self): if isinstance(self.configuration, PyposmatConfigurationFile): return self.configuration.parameter_names else: return None @property def names(self): names = ['sim_id'] if self.df is not None: if 'cluster_id' in self.df.columns: names += ['cluster_id'] names += self.parameter_names names += ['reason'] return names @property def types(self): types = ['sim_id'] if self.df is not None: if 'cluster_id' in self.df.columns: names += ['cluster_id'] types += len(self.parameter_names) * ['param'] types += ['reason'] return types @property def n_samples(self): (n_rows, n_cols) = self.df.shape return n_rows def get_header_string(self): header_line_1 = ['sim_id']\ + self.parameter_names\ + ['reason'] header_line_2 = ['sim_id']\ + len(self.parameter_names)*['param']\ + ['reason'] str_header_section = "{}\n".format(",".join(header_line_1)) str_header_section += "{}\n".format(",".join(header_line_2)) return str_header_section def write_header_section(self, filename=None): assert isinstance(self.parameter_names, list) assert filename is None or isinstance(filename, str) if filename is not None: self.filename = filename header_str = self.get_header_string() with open(self.filename, 'w') as f: f.write(header_str) def write_simulation_exception(self, sim_id, exception): is_debug = False assert isinstance(exception, BaseException) assert isinstance(self.parameter_names, list) if not isinstance(sim_id, str): sim_id = str(sim_id) s_reason = exception.explain() s = ",".join( [sim_id] \ + [str(exception.kwargs['parameters'][k]) for k in self.parameter_names] \ + [s_reason] ) + "\n" if is_debug: print(s) with open(self.filename, 'a') as f: f.write(s) def read(self, filename=None): if filename is not None: self.filename = filename try: with open(self.filename, 'r') as f: lines = f.readlines() except FileNotFoundError as e: print("cannot find file: {}".format(self.filename)) print("current_working_dir: {}".format(os.getcwd())) raise self._names = [s.strip() for s in lines[0].strip().split(',')] self._types = [s.strip() for s in lines[1].strip().split(',')] table = [] for line in lines[2:]: tokens = line.strip().split(',') values = [] for i, v in enumerate(self._names): try: values.append(float(tokens[i])) except ValueError as e: if v.endswith('latticetype'): values.append(tokens[i]) elif v.endswith('sim_id'): values.append(tokens[i]) elif v.endswith('reason'): values.append(tokens[i]) else: print(v) print(tokens[i]) raise table.append(values) self.df = pd.DataFrame(data=table, columns=self._names) def write(self, filename): fn = filename s = [",".join(self.names)] s += [",".join(self.types)] s += [ ",".join([str(v) for v in k]) for k in self.df[self.names].values.tolist() ] with open(fn, 'w') as f: f.write("\n".join(s)) def write_subselect(self, filename=None): _filename = None if filename is None: _filename = "subselect.{}.out".format(".".join(self.score_names)) elif type(filename) is str: _filename = filename else: err_msg = "the filename argument for this method must be a string" raise ValueError(err_msg) if type(self.sub_df) is None: err_msg = "no subselection has been done on the data" raise ValueError(err_msg) if not isinstance(self.sub_df, pd.DataFrame): print(type(self.sub_df)) err_msg = "the sub_df attribute must be a pandas.DataFrame" raise ValueError(err_msg) # build the string str_out = ','.join([n for n in self.names]) + "\n" str_out += ','.join([t for t in self.types]) + "\n" for row in self.sub_df.iterrows(): _row = [a for i, a in enumerate(row[1])] #unpack tuple try: _row[0] = int(_row[0]) # row[0] is the sim_id except ValueError as e: raise #pass str_out += ','.join([str(s) for s in _row]) + "\n" with open(_filename, 'w') as f: f.write(str_out) return _filename
if __name__ == "__main__": import pypospack.utils # pypospack root directory pypospack_root_dir = pypospack.utils.get_pypospack_root_directory() config_fn = os.path.join( pypospack_root_dir, 'data','Si__sw__data','pareto_optimization_unconstrained', 'pyposmat.config.in') data_fn = os.path.join( pypospack_root_dir, 'data','Si__sw__data','pareto_optimization_unconstrained', 'pyposmat.kde.20.out') o_config = PyposmatConfigurationFile() o_config.read(filename=config_fn) o_data = PyposmatDataFile() o_data.read(filename=data_fn) manifold_learn_config = OrderedDict() manifold_learn_config['manifold_type'] = 'tsne' manifold_learn_config['pypospack_config_fn'] = config_fn manifold_learn_config['pypospack_data_fn'] = data_fn fig,ax = plt.subplots(1,3) if manifold_learn_config['manifold_type'] == 'mds': manifold['config'] = OrderedDict() manifold['config']['n_components'] = 2
# type=str, # help="location of the data directory") #parse_args = parser.parse_args() _n_potentials = 30 _data_fn = "data__Ni__eam__born_exp_bjs_01\pyposmat.results.0.out" #_data_fn = "results.temp.out" _config_fn = "data__Ni__eam__born_exp_bjs_01\pyposmat.config.in" _plot_fn = "rugplot.png" make_rugplots = False print(80*'-') print("reading the configuration file {}...".format(_config_fn)) config=PyposmatConfigurationFile() config.read(filename=_config_fn) qoi_targets=get_qoi_targets(config) print("reading the data file {}...".format(_data_fn)) datafile=PyposmatDataFile() datafile.read(filename=_data_fn) from pypospack.pareto import pareto df = copy.deepcopy(datafile.df) nr,nc = df.shape _nsimulations = OrderedDict() _nsimulations['start'] = nr abs_error_names = ["{}.abserr".format(q) for q in datafile.qoi_names] for q in datafile.qoi_names: qe = "{}.err".format(q) qne = "{}.abserr".format(q)
class PyposmatParallelCoordinates(object): def __init__(self): self._configuration = PyposmatConfigurationFile() self._data = PyposmatDataFile() def set_configuration(self, configuration): if isinstance(configuration, str): self.set_configuration_by_path(path=configuration) elif isinstance(configuration, PyposmatConfigurationFile): self.set_configuration_by_object(config_obj=configuration) else: raise TypeError def set_data(self, data): if isinstance(data, str): self.data = PyposmatDataFile() self.data.read(data) elif isinstance(data, PyposmatDataFile): self.data = data else: raise TypeError def set_configuration_by_path(self, path): assert isinstance(path, str): self.configuration = PyposmatConfigurationFile() self.configuration.read(path) def set_configuration_by_obj(self, config_obj): assert isinstance(configuration, PyposmatConfigurationFile) self.configuration = config_obj def set_data_by_path(self, path): assert isinstance(path, str) self.data = PyposmatDataFile() self.data.read(path) def set_data_by_obj(self, data_obj): assert isinstance(data_obj, PyposmatDataFile) self.data = data @property def configuration(self): return self._configuration @configuration.setter def configuration(self, configuration): assert isinstance(configuration, PyposmatConfigurationFile) self._configuration = configuration @property def data(self): return self._data @data.setter def data(self, data): assert isinstance(configuration, PyposmatDataFile) self._data = data def plot(self, path): assert isinstance(path, str)
class PyposmatIterativeSampler(object): def __init__(self, configuration_filename, is_restart=False): self.RANK_DIR_FORMAT = 'rank_{}' self.mpi_comm = None self.mpi_rank = None self.mpi_size = None self.mpi_nprocs = None self.n_iterations = None self.rv_seed = None self.rv_seeds = None self.configuration_filename = configuration_filename self.configuration = None self.mc_sampler = None self.root_directory = os.getcwd() self.data_directory = 'data' self.is_restart = is_restart self.start_iteration = 0 def run_restart(self): if self.configuration is None: self.configuration = PyposmatConfigurationFile() self.configuration.read(self.configuration_filename) # determine if there was a seed file _init_fn = self.find_initial_parameters_files() # get contents of the data directory if it exists _data_dir = os.path.join(self.root_directory, self.data_directory) self.i_iterations, _data_dir_fns = self.analyze_rank_directories( data_dir=_data_dir) # get contents of the rank directories _root_dir = self.root_directory n_ranks, _rank_data_fns = self.analyze_rank_directories( root_dir=_root_dir) if self.mpi_rank == 0: pass pass def run_all(self): self.setup_mpi_environment() self.determine_rv_seeds() MPI.COMM_WORLD.Barrier() self.start_iteration = 0 for i in range(self.start_iteration, self.n_iterations): if self.mpi_rank == 0: print(80 * '-') print('{:80}'.format('BEGIN ITERATION {}/{}'.format( i + 1, self.n_iterations))) print(80 * '-') MPI.COMM_WORLD.Barrier() self.run_simulations(i) MPI.COMM_WORLD.Barrier() if self.mpi_rank == 0: print('merging files') self.merge_files(i) self.analyze_results(i) MPI.COMM_WORLD.Barrier() print(80 * '-') print('JOBCOMPLETE') def run_simulations(self, i_iteration): self.rank_directory = self.RANK_DIR_FORMAT.format(self.mpi_rank) # if the directory exists delete it if os.path.isdir(self.rank_directory): shutil.rmtree(self.rank_directory) os.makedirs(self.rank_directory) # change execution context for this rank # this provides a directory for each worker directory so that the # disk IO writes don't conflict os.chdir(self.rank_directory) _config_filename = os.path.join(self.root_directory, self.configuration_filename) _results_filename = os.path.join(self.root_directory, self.rank_directory, 'pyposmat.results.out') # set random seed np.random.seed(self.rv_seeds[self.mpi_rank, i_iteration]) # initialize() self.mc_sampler = PyposmatMonteCarloSampler( filename_in=_config_filename, filename_out=_results_filename, mpi_rank=self.mpi_rank, mpi_size=self.mpi_size) self.mc_sampler.create_base_directories() self.mc_sampler.read_configuration_file() _structure_dir = self.mc_sampler.configuration.structures[ 'structure_directory'] self.mc_sampler.configuration.structures['structure_directory'] = \ os.path.join('..',_structure_dir) self.mc_sampler.configure_qoi_manager() self.mc_sampler.configure_task_manager() self.mc_sampler.configure_pyposmat_datafile_out() #pyposmat_datafile_out = PyposmatDataFile(filename_out) if self.mpi_rank == 0: self.mc_sampler.print_structure_database() self.mc_sampler.print_sampling_configuration() if self.mpi_rank == 0 and i_iteration == 0: self.mc_sampler.print_initial_parameter_distribution() if self.mpi_rank == 0: print(80 * '-') MPI.COMM_WORLD.Barrier() _mc_config = self.mc_sampler.configuration.sampling_type[i_iteration] _mc_sample_type = _mc_config['type'] _mc_n_samples = _mc_config['n_samples'] # determine number of sims for this rank _n_samples_per_rank = int(_mc_n_samples / self.mpi_size) if _mc_n_samples % self.mpi_size > self.mpi_rank: _n_samples_per_rank += 1 if _mc_sample_type == 'parametric': self.mc_sampler.run_simulations(i_iteration=i_iteration, n_samples=_n_samples_per_rank) elif _mc_sample_type == 'kde': _filename_in = '' if 'file' in _mc_config: _filename_in = os.path.join(self.root_directory, _mc_config['file']) else: _filename_in = os.path.join( self.root_directory, self.data_directory, 'pyposmat.kde.{}.out'.format(i_iteration)) self.mc_sampler.run_simulations(i_iteration=i_iteration, n_samples=_n_samples_per_rank, filename=_filename_in) # get parameters from file elif _mc_sample_type == 'from_file': _filename_in = os.path.join(self.root_directory, _mc_config['file']) self.mc_sampler.run_simulations(i_iteration=i_iteration, n_samples=_n_samples_per_rank, filename=_filename_in) # return to root directory os.chdir(self.root_directory) def setup_mpi_environment(self): self.mpi_comm = MPI.COMM_WORLD self.mpi_rank = self.mpi_comm.Get_rank() self.mpi_size = self.mpi_comm.Get_size() self.mpi_procname = MPI.Get_processor_name() if self.mpi_rank == 0: self.print_mpi_environment() def print_mpi_environment(self): print(80 * '-') print('{:^80}'.format('MPI COMMUNICATION INFORMATION')) print(80 * '-') print('mpi_size={}'.format(self.mpi_size)) def determine_rv_seeds(self): _randint_low = 0 _randint_high = 2147483647 # set original seed if self.rv_seed is None: self.rv_seed = np.random.randint(low=_randint_low, high=_randint_high) np.random.seed(self.rv_seed) # determine rank seed self.rv_seeds = np.random.randint(low=0, high=2147483647, size=(int(self.mpi_size), self.n_iterations)) if self.mpi_rank == 0: self.print_random_seeds() def analyze_data_directories(self, data_dir=None): _d = data_dir i = 0 contents = [] if not os.path.exists(_d): return i, contents if not os.path.isdir(_d): return i, contents while True: kde_fn = os.path.join(_d, "pyposmat.kde.{}.out".format(i)) if os.path.exists(kde_fn): contents.append(kde_fn) else: if i > 0: contents.append(results_fn) break results_fn = os.path.join(_d, "pyposmat.results.{}.out".format(i)) if os.path.exists(results_fn): pass else: break i = i + 1 return i, contents def analyze_rank_directories(self, root_dir=None): i = 0 contents = [] if root_dir is None: _d = self.root_directory else: _d = root_directory while True: rank_dir = os.path.join(_d, "rank_{}".format(i)) if not os.path.exists(rank_dir): break if not os.path.isdir(rank_dir): break rank_fn = os.path.join("rank_{}".format(i), "pyposmat.results.out") if not os.path.exists(os.path.join(_d, rank_fn)): break if not os.path.isfile(os.path.join(_d, rank_fn)): break else: contents.append(rank_fn) i = i + 1 return i, contents def find_initial_parameters_file(self): if 'file' in self.configuration.sampling_type[0]: _init_fn = os.path.join( self.root_directory, self.configuration.sampling_type[0]['file']) if os.path.exists(_init_fn): if os.path.isfile(_init_fn): return _init_fn else: return None def merge_pypospack_datafiles(datafile_fns): d0 = PyposmatDataFile() d0.read(filename=datafile_fns[0]) df0 = d0.df for i in range(1, len(datafile_fns)): print("merging {}...".format(datafile_fns[i])) d = PyposmatDataFile() d.read(filename=datafile_fns[i]) df = d.df df0 = pd.concat([df0, df]).drop_duplicates().reset_index(drop=True) d0.df = df0 return d0 def merge_files(self, i_iteration): _dir = self.data_directory _n_ranks = self.mpi_size datafile = None # filename of old kde file _filename_kde = os.path.join(_dir, 'pyposmat.kde.{}.out'.format(i_iteration)) print('Looking for previous kde file') print(' {}'.format(_filename_kde)) datafile_fns = [] if os.path.exists(_filename_kde): if os.path.isfile(_filename_kde): datafile_fns.append(_filename_kde) for i_rank in range(_n_ranks): rank_fn = os.path.join('rank_{}'.format(i_rank), 'pyposmat.results.out') datafile_fns.append(rank_fn) names = ['sim_id']\ + self.parameter_names\ + self.qoi_names\ + self.error_names types = ['sim_id']\ + ['param']*len(self.parameter_names)\ + ['qoi']*len(self.qoi_names)\ + ['err']*len(self.error_names) dataframes = OrderedDict() for fn in datafile_fns: datafile = PyposmatDataFile() datafile.read(fn) #if fn.startswith('rank') #datafile.df['sim_id'] = datafile.df.apply( # lambda x:"{}_{}_{}".format( # i_iteration,i_rank,str(x['sim_id']))) dataframes[fn] = datafile.df[names] df = pd.concat(dataframes).reset_index(drop=True) datafile = PyposmatDataFile() datafile.df = df datafile.parameter_names = self.parameter_names datafile.error_names = self.error_names datafile.qoi_names = self.qoi_names datafile.names = names datafile.types = types try: fn_out = os.path.join( _dir, 'pyposmat.results.{}.out'.format(i_iteration)) datafile.write(filename=fn_out) except FileNotFoundError as e: if not os.path.exists(self.data_directory): os.mkdir(self.data_directory) datafile.write(filename_fn_out) else: raise def analyze_results(self, i_iteration): data_fn = os.path.join(\ self.root_directory, self.data_directory, 'pyposmat.results.{}.out'.format(i_iteration)) config_fn = os.path.join(\ self.root_directory, self.configuration_filename) kde_fn = os.path.join(\ self.root_directory, self.data_directory, 'pyposmat.kde.{}.out'.format(i_iteration+1)) data_analyzer = PyposmatDataAnalyzer() data_analyzer.read_configuration_file(filename=config_fn) data_analyzer.read_data_file(filename=data_fn) data_analyzer.write_kde_file(filename=kde_fn) def read_configuration_file(self, filename=None): assert isinstance(filename, str) or filename is None if filename is None: _filename_in = self.configuration_filename else: self.configuration_filename = filename _filename_in = filename self.configuration = PyposmatConfigurationFile() self.configuration.read(filename=_filename_in) self.n_iterations = self.configuration.n_iterations self.qoi_names = self.configuration.qoi_names self.error_names = self.configuration.error_names self.parameter_names = self.configuration.parameter_names print(self.parameter_names) print(self.qoi_names) print(self.error_names) def print_random_seeds(self): print(80 * '-') print('{:^80}'.format('GENERATED RANDOM SEEDS')) print(80 * '-') print() print('rv_seed={}'.format(self.rv_seed)) print() print('{:^8} {:^8} {:^10}'.format('rank', 'iter', 'seed')) print('{} {} {}'.format(8 * '-', 8 * '-', 10 * '-')) for i_rank in range(self.mpi_size): for i_iter in range(self.n_iterations): print('{:^8} {:^8} {:>10}'.format( i_rank, i_iter, self.rv_seeds[i_rank, i_iter]))
class PyposmatBokehVisualizer(object): def __init__(self): bokeh_tools = ['box_select', 'reset', 'box_zoom', 'pan'] self.bokeh_tools = ', '.join(bokeh_tools) def read_configuration(self, filename): self.configuration = PyposmatConfigurationFile() self.configuration.read(filename=filename) def read_data(self, filename): self.datafile = PyposmatDataFile() self.datafile.read(filename=filename) self.parameter_names = list(self.datafile.parameter_names) self.qoi_names = list(self.datafile.qoi_names) self.error_names = list(self.datafile.error_names) self.param_names = list(self.datafile.parameter_names) self.qoi_names = list(self.datafile.qoi_names) self.err_names = list(self.datafile.error_names) print("parameter names") print(type(self.param_names)) for i, v in enumerate(self.param_names): print("{:3} {:<20}".format(i, v)) print("qoi names") print(type(self.qoi_names)) for i, v in enumerate(self.qoi_names): print("{:3} {:<20}".format(i, v)) print("error_names") print(type(self.err_names)) for i, v in enumerate(self.err_names): print("{:3} {:<20}".format(i, v)) # generate pandas dataframes self.param_df = copy.deepcopy(self.datafile.df[self.param_names]) self.qoi_df = copy.deepcopy(self.datafile.df[self.qoi_names]) self.err_df = copy.deepcopy(self.datafile.df[self.err_names]) self.total_df = pd.concat([self.param_df, self.qoi_df, self.err_df], axis=1) def update_data(self, param_x, param_y, err_x, err_y): self.total_df['param_x'] = self.total_df[param_x] self.total_df['param_y'] = self.total_df[param_y] self.total_df['err_x'] = self.total_df[err_x] self.total_df['err_y'] = self.total_df[err_y] self.source.data = dict(param_x=self.total_df['param_x'], param_y=self.total_df['param_y'], err_x=self.total_df['err_x'], err_y=self.total_df['err_y']) def nix(self, val, lst): return [x for x in lst if x != val] def setup_bokeh_frame(self, doc): self.source = ColumnDataSource( data=dict(param_x=[], param_y=[], err_x=[], err_y=[])) self.source_static = ColumnDataSource( data=dict(param_x=[], param_y=[], err_x=[], err_y=[])) ''' --------------------------------------------------------------- Define Param Graph --------------------------------------------------------------- ''' self.param_graph = {} self.param_graph['obj_x_select'] = Select(value=self.param_names[0], options=self.nix( self.param_names[1], self.param_names)) self.param_graph['obj_y_select'] = Select(value=self.param_names[1], options=self.nix( self.param_names[0], self.param_names)) self.param_graph['x_min_entry'] = TextInput(placeholder='Min X Value', value='') self.param_graph['x_max_entry'] = TextInput(placeholder='Max X Value', value='') self.param_graph['y_min_entry'] = TextInput(placeholder='Min Y Value', value='') self.param_graph['y_max_entry'] = TextInput(placeholder='Max Y Value', value='') self.param_graph['plot_width'] = 610 self.param_graph['plot_height'] = 400 self.param_graph['tools'] = self.bokeh_tools self.param_graph['obj_figure'] = figure( plot_width=self.param_graph['plot_width'], plot_height=self.param_graph['plot_height'], tools=self.param_graph['tools'], title=self.param_graph['obj_x_select'].value + ' vs. ' + self.param_graph['obj_y_select'].value) self.param_graph['obj_figure'].xaxis.axis_label = self.param_graph[ 'obj_x_select'].value self.param_graph['obj_figure'].yaxis.axis_label = self.param_graph[ 'obj_y_select'].value self.param_graph['obj_glyph'] = Circle(x='param_x', y='param_y', size=1, fill_color='#5F77D5', line_color='#5F77D5') self.param_graph['obj_figure'].add_glyph(self.source, self.param_graph['obj_glyph']) ''' --------------------------------------------------------------- Define Err Graph --------------------------------------------------------------- ''' self.err_graph = {} self.err_graph['obj_x_select'] = Select(value=self.err_names[0], options=self.nix( self.err_names[1], self.err_names)) self.err_graph['obj_y_select'] = Select(value=self.err_names[1], options=self.nix( self.err_names[0], self.err_names)) self.err_graph['x_min_entry'] = TextInput(placeholder='Min X Value', value='') self.err_graph['x_max_entry'] = TextInput(placeholder='Max X Value', value='') self.err_graph['y_min_entry'] = TextInput(placeholder='Min Y Value', value='') self.err_graph['y_max_entry'] = TextInput(placeholder='Max Y Value', value='') self.err_graph['plot_width'] = 610 self.err_graph['plot_height'] = 400 self.err_graph['tools'] = self.bokeh_tools self.err_graph['obj_figure'] = figure( plot_width=self.err_graph['plot_width'], plot_height=self.err_graph['plot_height'], tools=self.err_graph['tools'], title=self.err_graph['obj_x_select'].value + ' vs. ' + self.err_graph['obj_y_select'].value) self.err_graph['obj_figure'].xaxis.axis_label = self.err_graph[ 'obj_x_select'].value self.err_graph['obj_figure'].yaxis.axis_label = self.err_graph[ 'obj_y_select'].value self.err_graph['obj_glyph'] = Circle(x='err_x', y='err_y', size=1, fill_color='#5F77D5', line_color='#5F77D5') self.err_graph['obj_figure'].add_glyph(self.source, self.err_graph['obj_glyph']) def update(): param_name_x = self.param_graph['obj_x_select'].value param_name_y = self.param_graph['obj_y_select'].value err_name_x = self.err_graph['obj_x_select'].value err_name_y = self.err_graph['obj_y_select'].value self.update_data(param_name_x, param_name_y, err_name_x, err_name_y) param_widgets = bokeh.layouts.row(self.param_graph['obj_x_select'], self.param_graph['obj_y_select']) param_x_entry = bokeh.layouts.row(self.param_graph['x_min_entry'], self.param_graph['x_max_entry']) param_y_entry = bokeh.layouts.row(self.param_graph['y_min_entry'], self.param_graph['y_max_entry']) param_pane = bokeh.layouts.column(param_widgets, self.param_graph['obj_figure'], param_x_entry, param_y_entry) err_widgets = bokeh.layouts.row(self.err_graph['obj_x_select'], self.err_graph['obj_y_select']) err_x_entry = bokeh.layouts.row(self.err_graph['x_min_entry'], self.err_graph['x_max_entry']) err_y_entry = bokeh.layouts.row(self.err_graph['y_min_entry'], self.err_graph['y_max_entry']) err_pane = bokeh.layouts.column(err_widgets, self.err_graph['obj_figure'], err_x_entry, err_y_entry) layout = bokeh.layouts.row(param_pane, err_pane) doc.add_root(layout) update() # callback functions def param_x_select_change(attrname, old, new): self.source.data['param_x'] = self.total_df[new] self.param_graph[ 'obj_figure'].title.text = new + ' vs. ' + self.param_graph[ 'obj_y_select'].value self.param_graph['obj_figure'].xaxis.axis_label = new def param_y_select_change(attrname, old, new): self.source.data['param_y'] = self.total_df[new] self.param_graph['obj_figure'].title.text = self.param_graph[ 'obj_x_select'].value + ' vs. ' + new self.param_graph['obj_figure'].yaxis.axis_label = new self.param_graph['obj_x_select'].on_change('value', param_x_select_change) self.param_graph['obj_y_select'].on_change('value', param_y_select_change) def err_x_select_change(attrname, old, new): self.source.data['err_x'] = self.total_df[new] self.err_graph[ 'obj_figure'].title.text = new + ' vs. ' + self.err_graph[ 'obj_y_select'].value self.err_graph['obj_figure'].xaxis.axis_label = new def err_y_select_change(attrname, old, new): self.source.data['err_y'] = self.total_df[new] self.err_graph['obj_figure'].title.text = self.err_graph[ 'obj_x_select'].value + ' vs. ' + new self.err_graph['obj_figure'].yaxis.axis_label = new self.err_graph['obj_x_select'].on_change('value', err_x_select_change) self.err_graph['obj_y_select'].on_change('value', err_y_select_change) def source_callback(attrname, old, new): selected_index_list = list(new['1d']['indices']) selected_rows = [] for i in selected_index_list: data_row = self.total_df.ix[i] selected_rows.append(data_row) formatted_rows = [] for rows in selected_rows: rows = rows[: -4] # remove the 4 copied columns used in source callback formatted_rows.append(list(rows.get_values())) ''' for rows in selected_rows: param_x_row = self.param_graph['obj_x_select'].value+': '+str(rows[self.param_graph['obj_x_select'].value]) param_y_row = self.param_graph['obj_y_select'].value+': '+str(rows[self.param_graph['obj_y_select'].value]) err_x_row = self.err_graph['obj_x_select'].value+': '+str(rows[self.err_graph['obj_x_select'].value]) err_y_row = self.err_graph['obj_y_select'].value+': '+str(rows[self.err_graph['obj_y_select'].value]) formatted_rows.append(str(param_x_row)+' '+str(param_y_row)+' '+str(err_x_row)+' '+str(err_y_row)) ''' with open('selected_points.txt', 'w') as f: f.write(' '.join(self.param_names) + ' ' + ' '.join(self.err_names) + '\n') for fr in formatted_rows: # apparently python cannot write a list to a file so the extra formatting is necessary fr = str(fr) fr.replace('[', '') fr.replace(']', '') f.write(fr + '\n') self.source.on_change('selected', source_callback) def param_x_min_callback(attrname, old, new): self.param_graph['obj_figure'].x_range.start = float(new) def param_x_max_callback(attrname, old, new): self.param_graph['obj_figure'].x_range.end = float(new) def param_y_min_callback(attrname, old, new): self.param_graph['obj_figure'].y_range.start = float(new) def param_y_max_callback(attrname, old, new): self.param_graph['obj_figure'].y_range.end = float(new) self.param_graph['x_min_entry'].on_change('value', param_x_min_callback) self.param_graph['x_max_entry'].on_change('value', param_x_max_callback) self.param_graph['y_min_entry'].on_change('value', param_y_min_callback) self.param_graph['y_max_entry'].on_change('value', param_y_max_callback) def err_x_min_callback(attrname, old, new): self.err_graph['obj_figure'].x_range.start = float(new) def err_x_max_callback(attrname, old, new): self.err_graph['obj_figure'].x_range.end = float(new) def err_y_min_callback(attrname, old, new): self.err_graph['obj_figure'].y_range.start = float(new) def err_y_max_callback(attrname, old, new): self.err_graph['obj_figure'].y_range.end = float(new) self.err_graph['x_min_entry'].on_change('value', err_x_min_callback) self.err_graph['x_max_entry'].on_change('value', err_x_max_callback) self.err_graph['y_min_entry'].on_change('value', err_y_min_callback) self.err_graph['y_max_entry'].on_change('value', err_y_max_callback) def start_bokeh_server(self): self.bokeh_app = Application(FunctionHandler(self.setup_bokeh_frame)) self.bokeh_server = Server({'/': self.bokeh_app}, num_procs=1) self.bokeh_server.start() # start io loop for bokeh_server self.bokeh_server.io_loop.add_callback(self.bokeh_server.show, '/') self.bokeh_server.io_loop.start()
parallel_plot_config['p_3.5_q_0.5']['data_fn'] = os.path.join( parallel_plot_config['p_3.5_q_0.5']['data_directory'], 'pyposmat.kde.19.out') if __name__ == "__main__": # initialization o_plot = PyposmatParallelCoordinatesPlot() # add data to plot for k, v in parallel_plot_config.items(): print(k, v) if k == 'args': pass else: o_config = PyposmatConfigurationFile() o_config.read(filename=v['config_fn']) o_data = PyposmatDataFile() o_data.read(filename=v['data_fn']) o_data.create_normalized_errors(qoi_targets=o_config.qoi_targets) o_plot.add_dataframe(color=v['color'], label=v['label'], obj=copy.deepcopy(o_data.df), names=o_data.normalized_names) o_plot.make_plot(filename="parallel_plot.png", xlabels=o_data.normalized_error_names, ylabel="% error", title="Si sw", ylim=(-175, 25),
from sklearn.cluster import DBSCAN, KMeans from sklearn.preprocessing import StandardScaler import matplotlib.pyplot as plt import pandas as pd from pandas.tools.plotting import parallel_coordinates import copy if __name__ == "__main__": # define paths to configuration and data files configuration_path = "/home/seaton/python-repos/pypospack/examples/Ni__eam__born_exp_fs__sensitivityanalysis/data__from_pareto_optimization/pyposmat.config.in" data_path = "/home/seaton/python-repos/pypospack/examples/Ni__eam__born_exp_fs__sensitivityanalysis/data__from_pareto_optimization/pyposmat.kde.5.out" # init the configuration object o_config = PyposmatConfigurationFile() o_config.read(configuration_path) # init the data file object o_data = PyposmatDataFile() o_data.read(data_path) # normalize the QOIs to prepare for tSNE o_normalizer = StandardScaler() normal_qoi_arr = o_normalizer.fit_transform(o_data.qoi_df) normal_qoi_df = pd.DataFrame(data=normal_qoi_arr, columns=o_data.qoi_names) # learn and apply tSNE manifold to the normal QOIs o_tsne = TSNE() tsne_arr = o_tsne.fit_transform(normal_qoi_df) # find KMeans clusters in the tSNE space