Python PyposmatDataFile.read Examples, pypospack.pyposmat.data.PyposmatDataFile.read Python Examples

Example #1

0

Show file

File: dev__filesampler__Si__reference.py Project: mastricker/pypospack

class FileSampler():

    def __init__(self,
                 configuration,
                 data,
                 structure_name,
                 structure_path,
                 workflow_type,
                 workflow_definition):
        self.initialize_configuration(configuration)
        self.initialize_data(data)
        self.structure_name = structure_name
        self.strucutre_path = structure_path
        self.workflow_type = workflow_type
        self.workflow_definition = workflow_definition
        self.potential_definition = self.configuration.potential

    def initialize_configuration(self,configuration):
        if isinstance(configuration,PyposmatConfigurationFile):
            self.configuration = configuration
        elif isinstance(configuration,str):
            self.configuration = PyposmatConfigurationFile()
            self.configuration.read(filename=configuration)
        else:
            msg = ("configuration must be a path to a configuration file or an "
                   "instance of the PyposmatConfigurationFile,")
            raise TypeError(msg)

    def initialize_data(self,data):
        if isinstance(data,PyposmatDataFile):
            self.data = data
        elif isinstance(data,str):
            self.data = PyposmatDataFile()
            self.data.read(filename=data)
        else:
            msg = ("data must be a path to a data file or an instance of "
                   "PyposmatDataFile.")
            raise TypeError(msg)

    def run(self):
        for index,row in self.data.df.iterrows():
            sim_id = row['sim_id']
            print('working on sim_id:{}'.format(sim_id))
            parameters = OrderedDict([(k,row[k]) for k in self.configuration.parameter_names])

            original_path = os.getcwd()
            os.mkdir(sim_id)
            os.chdir(sim_id)
            if workflow_type == 'lmps_thermal_expansion':
                workflow = LammpsThermalExpansion(
                        structure_name=Si_structure_definition['name'],
                        structure_path=Si_structure_definition['filename'],
                        **workflow_definition)
                workflow.create_task_configurations()
                workflow.create_tasks()
                workflow.prepare_tasks(
                        potential_definition = self.potential_definition,
                        potential_parameters = parameters)
                workflow.run()
            os.chdir(original_path)

Example #2

0

Show file

def get_best_parameterization(config_fn,data_fn,metric_name='d_metric',o_config=None,o_data=None):
    _analyzer = PyposmatDataAnalyzer()
    _analyzer.read_configuration_file(filename=config_fn)
    _analyzer.read_data_file(filename=data_fn)

    # calculate the scoring metric
    if metric_name is 'd_metric':
        _df = _analyzer.calculate_d_metric(df=_analyzer.datafile.df)
    else:
        s = "The metric name {} is unsupported"
        s = s.format(metric_name)
        raise PyposmatUnsupportedPotentialScoringMetric(s)

    _data = PyposmatDataFile()
    _data.read(filename=data_fn)
    _data.df = _df
    _data.subselect_by_score(score_name='d_metric',n=1)

    _free_parameter_names = _analyzer.configuration.free_parameter_names
    
    _parameter_best_dict = OrderedDict()
    for pn in _free_parameter_names:
        _parameter_best_dict[pn] = _data.sub_parameter_df.iloc[0][pn]

    return _parameter_best_dict

Example #3

0

Show file

def test__read__wo_named_arguments():
    datafile = PyposmatDataFile()
    datafile.read(MgO_datafile)

    assert type(datafile.names) is list
    assert len(expected_names) == len(datafile.names)
    for i, v in enumerate(expected_names):
        assert expected_names[i] == datafile.names[i]

    assert type(datafile.parameter_names) is list
    assert len(parameter_names) == len(datafile.parameter_names)
    for i, v in enumerate(parameter_names):
        assert parameter_names[i] == datafile.parameter_names[i]

    assert type(datafile.qoi_names) is list
    assert len(qoi_names) == len(datafile.qoi_names)
    for i, v in enumerate(qoi_names):
        assert qoi_names[i] == datafile.qoi_names[i]

    assert type(datafile.error_names) is list
    assert len(error_names) == len(datafile.error_names)
    for i, v in enumerate(error_names):
        assert error_names[i] == datafile.error_names[i]

    assert type(datafile.df) is pd.DataFrame

Example #4

0

Show file

File: test__write__MgO.py Project: mastricker/pypospack

def test__write_header_section():

    cleanup_test()

    parameter_names = ['param{}'.format(i + 1) for i in range(3)]
    qoi_names = ['qoi{}'.format(i + 1) for i in range(5)]
    error_names = ['err{}'.format(i + 1) for i in range(5)]

    datafile = PyposmatDataFile()
    datafile.write_header_section(parameter_names=parameter_names,
                                  qoi_names=qoi_names,
                                  error_names=error_names,
                                  filename=datafile_out_fn)

    assert os.path.isfile(datafile_out_fn)

    datafile_read = PyposmatDataFile()
    datafile_read.read(filename=datafile_out_fn)

    assert len(datafile_read.parameter_names) == len(parameter_names)
    for i, v in enumerate(parameter_names):
        assert datafile_read.parameter_names[i] == v

    assert len(datafile_read.qoi_names) == len(qoi_names)
    for i, v in enumerate(qoi_names):
        assert datafile_read.qoi_names[i] == v

    assert len(datafile_read.error_names) == len(qoi_names)
    for i, v in enumerate(error_names):
        assert datafile_read.error_names[i] == v

    cleanup_test()

Example #5

0

Show file

File: test__get_descriptive_statistics.py Project: mastricker/pypospack

def dev__get_descriptive_statistics__from_kde_file():
    print(80 * '-')
    print(
        '{:^80}'.format('method -> get_descriptive_statistics__from_kde_file'))

    testing_set = get_testing_set()
    config_fn = testing_set['config_fn']
    results_data_fn = testing_set['results_fn']
    kde_data_fn = testing_set['kde_fn']

    assert os.path.isfile(config_fn)
    assert os.path.isfile(results_data_fn)
    assert os.path.isfile(kde_data_fn)

    o = PyposmatDataAnalyzer(config_fn=config_fn,
                             results_data_fn=results_data_fn)

    kde_data = PyposmatDataFile()
    kde_data.read(filename=kde_data_fn)

    descriptive_statistics = o.get_descriptive_statistics(df=kde_data.df)

    print(
        o.str__descriptive_statistics(
            descriptive_statistics=descriptive_statistics))
    print(kde_data.df.shape)

Example #6

0

Show file

File: test__write__MgO.py Project: mastricker/pypospack

def test__write_simulation_results__no_filename():

    cleanup_test()

    parameter_names = ['param{}'.format(i + 1) for i in range(3)]
    qoi_names = ['qoi{}'.format(i + 1) for i in range(5)]
    error_names = ['err{}'.format(i + 1) for i in range(5)]

    datafile = PyposmatDataFile()
    datafile.write_header_section(parameter_names=parameter_names,
                                  qoi_names=qoi_names,
                                  error_names=error_names,
                                  filename=datafile_out_fn)

    sim_id = "test_id"

    results = OrderedDict()
    results['parameters'] = OrderedDict([(v, 1.) for v in parameter_names])
    results['qois'] = OrderedDict([(v, 2.) for v in qoi_names])
    results['errors'] = OrderedDict([(v, 3.0) for v in error_names])

    datafile.write_simulation_results(sim_id, results)

    assert os.path.isfile(datafile_out_fn)

    datafile_read = PyposmatDataFile()
    datafile_read.read(filename=datafile_out_fn)

Example #7

0

Show file

File: kld_convergence_analysis.py Project: mastricker/pypospack

def calculate_kld(data_1_fn,data_2_fn,names,n_samples=2000):
    assert isinstance(data_1_fn,str)
    assert isinstance(data_2_fn,str)
    assert isinstance(n_samples,int)

    assert os.path.isfile(data_1_fn)
    assert os.path.isfile(data_1_fn)

    data_1 = PyposmatDataFile()
    data_1.read(filename=data_1_fn)

    data_2 = PyposmatDataFile()
    data_2.read(filename=data_2_fn)

    w1,v1 = linalg.eig(np.cov(data_1.df[names].T))
    w2,v2 = linalg.eig(np.cov(data_2.df[names].T))
  
    cov1_ill_conditioned = any([k < 0 for k in w1.tolist()])
    cov2_ill_conditioned = any([k < 0 for k in w2.tolist()])

    any_ill_conditioned = any([cov1_ill_conditioned,cov2_ill_conditioned])

    if any_ill_conditioned:
        print('using ill-conditioned kde')
        kde_1 = GaussianKde(data_1.df[names].T)
        print(kde_1.n, kde_1.d)
        kde_2 = GaussianKde(data_2.df[names].T)
    else:
        kde_1 = gaussian_kde(data_1.df[names].T)
        kde_2 = gaussian_kde(data_2.df[names].T)
    
    kld = kullbach_lieber_divergence(kde_1,kde_2,n_samples)
    return kld

Example #8

0

Show file

def test__attribute__names__after_reading_file():
    datafile_in_fn = "../../../../../data/MgO_pareto_data/culled_004.out"

    datafile = PyposmatDataFile()
    datafile.read(datafile_in_fn)

    assert type(datafile.names) is list

Example #9

0

Show file

File: make_parallel_coordinates_plot.py Project: mastricker/pypospack

def make_latex_table(config, data, qoi_type=None, param_type=None):
    qoi_types = ['by_qoi_target']
    param_type = []

    assert isinstance(config,str) \
           or isinstance(config,PyposmatConfigurationFile)
    assert isinstance(data,str) \
            or isinstance(data,PyposmatDataFile)

    if isinstance(config, str):
        o_config = PyposmatConfigurationFile()
        o_config.read(filename=config)
    elif isinstance(config, PyposmatConfigurationFile):
        o_config = config
    else:
        raise TypeError()

    if isinstance(data, str):
        o_data = PyposmatDataFile()
        o_data.read(filename=data)
    elif isinstance(data, PyposmatDataFile):
        o_data = data
    else:
        raise TypeError()

    if qoi_type == 'by_qoi_target':
        o_data.create_normalized_errors(normalize_type='by_qoi_target',
                                        qoi_targets=o_config.qoi_targets)
        df = o_data.df[o_data.normalized_error_names]

Example #10

0

Show file

    def merge_files(self, i_iteration):

        _dir = self.data_directory
        _n_ranks = self.mpi_size

        datafile = None
        # filename of old kde file
        _filename_kde = os.path.join(_dir,
                                     'pyposmat.kde.{}.out'.format(i_iteration))

        print('Looking for previous kde file')
        print('    {}'.format(_filename_kde))

        datafile_fns = []
        if os.path.exists(_filename_kde):
            if os.path.isfile(_filename_kde):
                datafile_fns.append(_filename_kde)
        for i_rank in range(_n_ranks):
            rank_fn = os.path.join('rank_{}'.format(i_rank),
                                   'pyposmat.results.out')
            datafile_fns.append(rank_fn)

        names = ['sim_id']\
                + self.parameter_names\
                + self.qoi_names\
                + self.error_names
        types = ['sim_id']\
                + ['param']*len(self.parameter_names)\
                + ['qoi']*len(self.qoi_names)\
                + ['err']*len(self.error_names)

        dataframes = OrderedDict()
        for fn in datafile_fns:
            datafile = PyposmatDataFile()
            datafile.read(fn)
            #if fn.startswith('rank')
            #datafile.df['sim_id'] = datafile.df.apply(
            #    lambda x:"{}_{}_{}".format(
            #        i_iteration,i_rank,str(x['sim_id'])))
            dataframes[fn] = datafile.df[names]

        df = pd.concat(dataframes).reset_index(drop=True)
        datafile = PyposmatDataFile()
        datafile.df = df
        datafile.parameter_names = self.parameter_names
        datafile.error_names = self.error_names
        datafile.qoi_names = self.qoi_names
        datafile.names = names
        datafile.types = types
        try:
            fn_out = os.path.join(
                _dir, 'pyposmat.results.{}.out'.format(i_iteration))
            datafile.write(filename=fn_out)
        except FileNotFoundError as e:
            if not os.path.exists(self.data_directory):
                os.mkdir(self.data_directory)
                datafile.write(filename_fn_out)
            else:
                raise

Example #11

0

Show file

File: test__read.py Project: mastricker/pypospack

def dev__read():
    
    testing_set = get_testing_set()

    o = PyposmatDataFile()
    o.read(filename=testing_set['results_data_fn'])

    print(o.df['sim_id'])

Example #12

0

Show file

File: test__PyposmatRugplots__MgO.py Project: mastricker/pypospack

def test__read_datafile():
    from pypospack.pyposmat.data import PyposmatDataFile
    o_data = PyposmatDataFile()
    o_data.read(filename=datafile_fn)

    o_rugplot = PyposmatParetoRugplot()
    o_rugplot.read_datafile(filename=datafile_fn)

    import pandas as pd
    assert type(o_rugplot.data.df) is pd.DataFrame

Example #13

0

Show file

def gmm_analysis(config_fn,
                 data_fn,
                 names,
                 output_directory='gmm_analysis',
                 max_components=20):
    assert isinstance(config_fn, str)
    assert isinstance(data_fn, str)
    assert os.path.isfile(config_fn)
    assert os.path.isfile(data_fn)

    if not os.path.isdir(output_directory):
        os.mkdir(output_directory)

    o_config = PyposmatConfigurationFile()
    o_config.read(filename=config_fn)

    o_data = PyposmatDataFile()
    o_data.read(filename=data_fn)
    o_data.create_normalized_errors(normalize_type='by_qoi_target',
                                    qoi_targets=o_config.qoi_targets)
    o_data.df['score'] = o_data.df[o_config.normalized_error_names].abs().sum(
        axis=1)

    data = o_data.df[names]

    n_components = np.arange(1, max_components)
    models = [
        GaussianMixture(n_components=n, covariance_type='full',
                        random_state=0).fit(data) for n in n_components
    ]

    # AIC analysis
    aic, aic_idx = min(
        (val, idx) for (idx, val) in enumerate([m.aic(data) for m in models]))
    aic_n_components = n_components[aic_idx]
    aic_criteria = [m.aic(data) for m in models]
    # BIC analysis
    bic, bic_idx = min(
        (val, idx) for (idx, val) in enumerate([m.bic(data) for m in models]))
    bic_n_components = n_components[bic_idx]
    bic_criteria = [m.bic(data) for m in models]

    #plot the criteria
    print('bic_n_components:{}'.format(bic_n_components))
    print('aic_n_components:{}'.format(aic_n_components))
    plot_fn = os.path.join(output_directory, 'aic_bic_plot.jpg')
    plot_gmm_aic_bic(filename=plot_fn,
                     n_components=n_components,
                     aic_criteria=aic_criteria,
                     bic_criteria=bic_criteria,
                     aic_n_components=aic_n_components,
                     bic_n_components=bic_n_components)

    filename = os.path.join('gmm_analysis', 'gmm_analysis.jpg')
    plot_gmm(models[bic_n_components], data, filename=filename)

Example #14

0

Show file

File: kld_convergence_analysis.py Project: mastricker/pypospack

def covariance_analysis(data_fn,names):
    assert isinstance(data_fn,str)
    assert isinstance(names,list)

    data = PyposmatDataFile()
    data.read(filename=data_fn)

    cov_matrix = np.cov(data.df[names].T)
    w,v = linalg.eig(cov_matrix)
    print("eigenvalues:\n",w)
    print("eigenvectors:\n",v)

Example #15

0

Show file

    def merge_pypospack_datafiles(datafile_fns):
        d0 = PyposmatDataFile()
        d0.read(filename=datafile_fns[0])
        df0 = d0.df
        for i in range(1, len(datafile_fns)):
            print("merging {}...".format(datafile_fns[i]))
            d = PyposmatDataFile()
            d.read(filename=datafile_fns[i])
            df = d.df

            df0 = pd.concat([df0, df]).drop_duplicates().reset_index(drop=True)
        d0.df = df0
        return d0

Example #16

0

Show file

class PyposmatPostProcessorTestHarness(object):
    def __init__(self, configuration_fn, datafile_fn):
        self.configuration_fn = configuration_fn
        self.datafile_fn = datafile_fn

        if configuration_fn is not None:
            self.configuration = PyposmatConfigurationFile()
            self.configuration.read(configuration_fn)
        if datafile_fn is not None:
            self.datafile = PyposmatDataFile()
            self.datafile.read(filename=datafile_fn)

    def get_parameter_names(self):
        return self.configuration.parameter_names

Example #17

0

Show file

def show_qoi_targets(config_fn,
                     data_fn):

    o_config = PyposmatConfigurationFile()
    o_config.read(filename=config_fn)

    o_data = PyposmatDataFile()
    o_data.read(filename=data_fn)

    for qoi_name, qoi_target in o_config.qoi_targets.items():
        try:
            qoi_avg = o_data.df[qoi_name].mean()
        except KeyError as e:
            qoi_avg = 'no value'
        s = "{:20} {:10} {:10}".format(qoi_name,qoi_target,qoi_avg)
        print(s)

Example #18

0

Show file

class BaseAnalysis(object):
    def __init__(self, configuration, data, output_path=None):
        self.configuration = None
        self.data = None
        self.output_path = None

        self._initialize_configuration(configuration=configuration)
        self._initialize_data(data=data)
        self._initialize_output_path(path=output_path)

    def _initialize_configuration(self, configuration):
        if isinstance(configuration, str):
            assert os.path.isfile(configuration)
            self.configuration = PyposmatConfigurationFile()
            self.configuration.read(filename=configuration)
        elif isinstance(configuration, PyposmatConfigurationFile):
            self.configuration = configuration
        else:
            raise TypeError('configuration cannot be type:{}'.format(
                str(type(configuration))))

    def _initialize_data(self, data):
        if isinstance(data, str):
            assert os.path.isfile(data)
            self.data = PyposmatDataFile()
            self.data.read(filename=data)
        elif isinstance(data, PyposmatDataFile):
            self.data = deepcopy(data)
        else:
            raise TypeError('data cannot be type:{}'.format(str(type(data))))

        self.data.create_normalized_errors(
            normalize_type='by_qoi_target',
            qoi_targets=self.configuration.qoi_targets)

    def _initialize_output_path(self, path):
        if path is None:
            self.output_path = None
        elif isinstance(path, str):
            if os.path.isdir(path):
                shutil.rmtree(path)
            os.mkdir(path)
            self.output_path = path
        else:
            raise TypeError

Example #19

0

Show file

def get_parameter_variance(
        config_fn,data_fn,
        metric_name='d_metric',
        n=100,
        o_config=None,
        o_data=None):
    """
    Args:
        config_fn (str):
        data_fn (str):
        metric_name (str):  (default:d_metric)
        n (int): the number of best metric values
        o_config (pypospack.config.data.PyposmatConfigurationFile)
        o_data (pypospack.config.data.PyposmatDataFile)
    Returns:
        collections.OrderedDict
    Raises:
        PyposmatUnknownPotentialScoringMetric
    """

    _analyzer = PyposmatDataAnalyzer()
    _analyzer.read_configuration_file(filename=config_fn)
    _analyzer.read_data_file(filename=data_fn)
   
    # calculate the scoring metric
    if metric_name is 'd_metric':
        _df = _analyzer.calculate_d_metric(df=_analyzer.datafile.df)
    else:
        s = "The metric name {} is unsupported"
        s = s.format(metric_name)
        raise PyposmatUnsupportedPotentialScoringMetric(s)

    _data = PyposmatDataFile()
    _data.read(filename=data_fn)
    _data.df = _df
    _data.subselect_by_score(score_name='d_metric',n=n)

    _param_std_df = _data.sub_parameter_df.std(axis=0)
   
    _parameter_std_dict = OrderedDict()
    for pn in _analyzer.parameter_names:
        _parameter_std_dict[pn] =_param_std_df.to_dict()[pn]
    
    return _parameter_std_dict

Example #20

0

Show file

def make_rug_plot(config_fn,
                  data_fn,
                  ax=None,
                  plot_fn='rugplot.png'):

    o_config = PyposmatConfigurationFile()
    o_config.read(filename=config_fn)

    o_data = PyposmatDataFile()
    o_data.read(filename=data_fn)

    qoi_targets = o_config.qoi_targets
    #qoi_targets = get_qoi_targets(o_config)
    error_names = o_data.error_names
    qoi_names = o_data.qoi_names

    # create normalized error
    df = copy.deepcopy(o_data.df[error_names])
    for qn in qoi_names:
        en = "{}.err".format(qn)
        nen = "{}.nerr".format(qn)
        q = qoi_targets[qn]
        df[nen]=o_data.df[en]/q-q

    (_nrows,_ncols) = o_data.df.shape

    if ax is None:
        fig, ax = plt.subplots(nrows=1,ncols=1)

    for iq,qn in enumerate(qoi_names):
        _yloc = [iq+1]
        ax.scatter(
            df["{}.nerr".format(qn)],
            _nrows*[iq+1],
            marker='|',
            s=100.,
            color='k'
        )

    plt.sca(ax)
    plt.yticks(range(len(qoi_names)+1),['']+qoi_names)
    fig.savefig(plot_fn)

Example #21

0

Show file

File: manifold_analysis.py Project: mastricker/pypospack

    pypospack_root_dir = pypospack.utils.get_pypospack_root_directory()

    config_fn = os.path.join(
            pypospack_root_dir,
            'data','Si__sw__data','pareto_optimization_unconstrained',
            'pyposmat.config.in')
    data_fn = os.path.join(
            pypospack_root_dir,
            'data','Si__sw__data','pareto_optimization_unconstrained',
            'pyposmat.kde.20.out')

    o_config = PyposmatConfigurationFile()
    o_config.read(filename=config_fn)
    
    o_data = PyposmatDataFile()
    o_data.read(filename=data_fn)

    manifold_learn_config = OrderedDict()
    manifold_learn_config['manifold_type'] = 'tsne'
    manifold_learn_config['pypospack_config_fn'] = config_fn 
    manifold_learn_config['pypospack_data_fn'] =  data_fn

    fig,ax = plt.subplots(1,3)


    if manifold_learn_config['manifold_type'] == 'mds':
        manifold['config'] = OrderedDict()
        manifold['config']['n_components'] = 2
        manifold['config']['max_iter'] = 1000
        manifold['config']['n_init'] = 1

Example #22

0

Show file

    _n_potentials = 30
    _data_fn = "data__Ni__eam__born_exp_bjs_01\pyposmat.results.0.out"
    #_data_fn = "results.temp.out"
    _config_fn = "data__Ni__eam__born_exp_bjs_01\pyposmat.config.in"
    _plot_fn = "rugplot.png"

    make_rugplots = False

    print(80*'-')
    print("reading the configuration file {}...".format(_config_fn))
    config=PyposmatConfigurationFile()
    config.read(filename=_config_fn)
    qoi_targets=get_qoi_targets(config)
    print("reading the data file {}...".format(_data_fn))
    datafile=PyposmatDataFile()
    datafile.read(filename=_data_fn)

    from pypospack.pareto import pareto

    df = copy.deepcopy(datafile.df)
    nr,nc = df.shape
    _nsimulations = OrderedDict()
    _nsimulations['start'] = nr
    abs_error_names = ["{}.abserr".format(q) for q in datafile.qoi_names]
    for q in datafile.qoi_names:
        qe = "{}.err".format(q)
        qne = "{}.abserr".format(q)
        df[qne] = df[qe].abs()
    names = list(df.columns.values)
    abs_err_idx = [names.index(n) for n in abs_error_names]
    pareto_idx = pareto(df[abs_error_names].values.tolist())

Example #23

0

Show file

File: parallelcoordinates.py Project: eragasa/pypospack

class PyposmatParallelCoordinates(object):
    
    def __init__(self):
        self._configuration = PyposmatConfigurationFile()
        self._data = PyposmatDataFile()

    def set_configuration(self, configuration):
        if isinstance(configuration, str):
            self.set_configuration_by_path(path=configuration)
        elif isinstance(configuration, PyposmatConfigurationFile):
            self.set_configuration_by_object(config_obj=configuration)
        else:
            raise TypeError

    def set_data(self, data):
        if isinstance(data, str):
            self.data = PyposmatDataFile()
            self.data.read(data)
        elif isinstance(data, PyposmatDataFile):
            self.data = data
        else:
            raise TypeError
    
    def set_configuration_by_path(self, path):
        assert isinstance(path, str):
        self.configuration = PyposmatConfigurationFile()
        self.configuration.read(path)

    def set_configuration_by_obj(self, config_obj):
        assert isinstance(configuration, PyposmatConfigurationFile)
        self.configuration = config_obj

    def set_data_by_path(self, path):
        assert isinstance(path, str)
        self.data  = PyposmatDataFile()
        self.data.read(path)

    def set_data_by_obj(self, data_obj):
        assert isinstance(data_obj, PyposmatDataFile)
        self.data = data

    @property
    def configuration(self):
        return self._configuration

    @configuration.setter
    def configuration(self, configuration):
        assert isinstance(configuration, PyposmatConfigurationFile)
        self._configuration = configuration

    @property
    def data(self):
        return self._data

    @data.setter
    def data(self, data):
        assert isinstance(configuration, PyposmatDataFile)
        self._data = data

    def plot(self, path):
        assert isinstance(path, str)

Example #24

0

Show file

File: rugplots_MgO_pareto.py Project: mastricker/pypospack

        config.read(filename=config_fn)
    except FileNotFoundError as e:
        msg = "Cannot find pyposmat configuration file:{}".format(config_fn)
        message_out(msg)

        if pyposmat_config_script is not None:
            # run configuration script
            pass
        else:
            msg = "cannot find pyposmat configuration script because pyposmat_config_script variable was not set"
            message_out(msg)
            raise

    # read the data file
    datafile = PyposmatDataFile()
    datafile.read(filename=datafile_fn)
    (nrows, ncols) = datafile.df.shape

    msg = "reading data file....\n"
    msg += "\t{}\n".format(datafile_fn)
    msg += "the data file has...\n"
    msg += "\t{} nrows\n".format(nrows)
    msg += "\t{} ncols\n".format(ncols)
    message_out(msg)

    # set the plot filename
    plot_fn = "rugplots_MgO_buck.png"

    # check to see if we have excluded names
    qoi_excluded_names = []
    qoi_names = [q for q in config.qoi_names if q not in qoi_excluded_names]

Example #25

0

Show file

import numpy as np
import pandas as pd
from pypospack.pyposmat.data import PyposmatDataFile

data_fn = "pyposmat.results.0.out"
#u can turn panda info into numpy array. in matlab this is a pain
#figure out how to do a PCA plot... find examples on scikitlearn python package
#( in a different file )
data = PyposmatDataFile()
data.read(filename=data_fn)
#look up how to do kernel density estimate.. heat maps are cool!!!!!!!!
#wiki.materialsexmachina.com/index.php/Kernel_Density_Estimate
#youre chosing axes that a re linear ocmbinations of the parameters.
#youre also choosing the first axes/second axes that are orthogonal to each
#other and at the first axis is the direction of the greatest variace and the
#second axes is in the orthogonal direction which describes  the greatest amount
# of variance in a direction orthogonal to the first PCA vector.
#data.df is a pandas dataframe. this is the data structure, How do I get the
#smallest values? then you can search with this criteria. then I dont

#Data information

print('Data structure =')
print(type(data.df))
print('Shape =')
print(data.df.shape)
print('Columns = ')
print(list(data.df.columns.values))
print('Parameter names =')
print(data.parameter_names)
print('QOI names =')

Example #26

0

Show file

    def run_file_sampling(self, filename_in):

        _datafile_in = PyposmatDataFile(filename=filename_in)
        _datafile_in.read()
        # configure random number generator

        self.write_data_out_header()
        self.write_badparameters_header()

        time_start_iteration = time.time()

        _n_errors = 0
        i_sample = 0
        for row in _datafile_in.df.iterrows():
            if self.mpi_rank != i_sample % self.mpi_size:
                i_sample += 1
                continue
            else:
                i_sample += 1
            _parameters = OrderedDict([(p, row[1][p])
                                       for p in self.parameter_names])
            _sim_id = row[1]['sim_id']

            # generate wierd things
            for p in self.constrained_parameter_names:
                if self.parameter_distribution_definition[p][0] == 'equals':
                    if type(self.parameter_distribution_definition[p]
                            [1]) is list:
                        if self.parameter_distribution_definition[p][1][
                                0] == 'equilibrium_density':
                            a0 = self.parameter_distribution_definition[p][1][
                                1]
                            latt = self.parameter_distribution_definition[p][
                                1][2]
                            _parameters[
                                p] = self.calculate_equilibrium_density(
                                    a0, latt, _parameters)
            try:
                # check constraints
                for k, v in self.parameter_constraints.items():
                    _eval_str = v
                    for pn, pv in _parameters.items():
                        _eval_str = _eval_str.replace(pn, str(pv))
                    if eval(_eval_str) is False:
                        raise PyposmatBadParameterError()

                _results = self.evaluate_parameter_set(parameters=_parameters)
            except PyposmatBadParameterError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except LammpsSimulationError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except PypospackTaskManagerError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except PypospackBadEamEosError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            else:
                if type(_sim_id) is float: _sim_id = int(sim_id)
                self.pyposmat_datafile_out.write_simulation_results(
                    filename=self.pyposmat_data_out_filename,
                    sim_id=_sim_id,
                    results=_results)
            finally:
                # print out summaries every 10 solutions
                i_sample = i_sample + 1
                if (i_sample) % 10 == 0:
                    n_samples_completed = i_sample
                    time_end = time.time()
                    time_total = time_end - time_start_iteration
                    avg_time = time_total / n_samples_completed
                    _str_msg = '{} samples completed in {:.4f}s. Avg_time = {:.4f}. n_errors = {}'.format(
                        n_samples_completed, time_total, avg_time, _n_errors)
                    print('rank{}:'.format(self.mpi_rank) + _str_msg)

Example #27

0

Show file

    def run_kde_sampling(self,
                         n_samples,
                         filename_in,
                         cluster_id=None,
                         kde_bw_type='chiu1999'):
        """ sample from a KDE distribution

        Args:
            n_samples(int): the number of samples to draw from the KDE distribution
            filename_in(str): the path to the datafile from which the parameters will be drawn from
            cluster_id(int): if we need to use a specific cluster_id, we specify it here.  
                otherwise, it will be drawn from all parameters contained within the set.
            kde_bw_type(str): the method of estimating the optimal bandwidth
        """
        _datafile_in = PyposmatDataFile()
        _datafile_in.read(filename_in)

        if cluster_id is None:
            _free_parameter_names = [str(v) for v in self.free_parameter_names]
            _X = _datafile_in.df[_free_parameter_names].values.T
        else:
            # subselect the dataframe by the cluster_id of interest
            _datafile_in.df = _datafile_in.df.loc[_datafile_in.df['cluster_id']
                                                  == cluster_id]
            _X = _datafile_in.df[self.free_parameter_names].loc[
                _datafile_in.df['cluster_id'] == cluster_id].values.T
            # self.log.write("cluster_id {c} _X.shape={x}".format(c=cluster_id, x=_X.shape))

        kde_bw = self.determine_kde_bandwidth(X=_X, kde_bw_type=kde_bw_type)

        _rv_generator = scipy.stats.gaussian_kde(_X, kde_bw)

        self.write_data_out_header()
        self.write_badparameters_header()

        time_start_iteration = time.time()
        _n_errors = 0

        for i_sample in range(n_samples):
            # determine sim_id
            sim_id = self.get_sim_id(i=i_sample)

            # new OrderedDict to hold in parameter values
            _parameters = OrderedDict([(p, None)
                                       for p in self.parameter_names])

            # generate free parameters for ordered dictionary
            _free_parameters = _rv_generator.resample(1)
            for i, v in enumerate(self.free_parameter_names):
                _parameters[v] = float(_free_parameters[i, 0])

            # determine parameters determined from equality constraints
            for p in self.constrained_parameter_names:
                _constraint_type = self.parameter_distribution_definition[p][0]
                if _constraint_type == 'equals':

                    # this condition is for fitting EoS for EAM function which
                    # requires a refernce ground state crystal structure
                    if p.endswith('latticetype'):
                        _v = self.parameter_distribution_definition[p][1]
                        _parameters[p] = _v

                    # process evaluation strings
                    elif type(self.parameter_distribution_definition[p]
                              [1]) is not list:
                        _str_eval = str(
                            self.parameter_distribution_definition[p][1])

                        # replace string values with numerical values
                        for fp in self.free_parameter_names:
                            if fp in _str_eval:
                                _str_eval = _str_eval.replace(
                                    fp, str(_parameters[fp]))

                        # evaluate the string into a float
                        _parameters[p] = eval(_str_eval)
                    else:
                        raise ValueError("oops")

            for p in self.constrained_parameter_names:
                if self.parameter_distribution_definition[p][0] == 'equals':
                    # some EAM potentials have a normalizing equilbirum density
                    # which have to be determined based upon the parameterization of
                    # the electron density function
                    if type(self.parameter_distribution_definition[p]
                            [1]) is list:
                        if self.parameter_distribution_definition[p][1][
                                0] == 'equilibrium_density':
                            a0 = self.parameter_distribution_definition[p][1][
                                1]
                            latt = self.parameter_distribution_definition[p][
                                1][2]
                            _parameters[
                                p] = self.calculate_equilibrium_density(
                                    a0, latt, _parameters)

            try:
                # now we check parameter inequality constraints
                for k, v in self.parameter_constraints.items():
                    _eval_str = v
                    for pn, pv in _parameters.items():
                        _eval_str = _eval_str.replace(pn, str(pv))

                    if eval(_eval_str) is False:
                        s = 'parameter constraint failed, {}'.format(k)
                        raise PyposmatBadParameterError(s,
                                                        parameters=_parameters)
                _results = self.evaluate_parameter_set(parameters=_parameters)
            except PyposmatBadParameterError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except LammpsSimulationError as e:
                assert isinstance(self.pyposmat_badparameters,
                                  PyposmatBadParametersFile)
                assert isinstance(self.pyposmat_badparameters.parameter_names,
                                  list)
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except PypospackTaskManagerError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            except PypospackBadEamEosError as e:
                self.pyposmat_badparameters.write_simulation_exception(
                    sim_id=sim_id, exception=e)
                _n_errors += 1
            else:

                # determine sim_id
                _sim_id = int(i_sample)

                self.pyposmat_datafile_out.write_simulation_results(
                    filename=self.pyposmat_data_out_filename,
                    sim_id=i_sample,
                    cluster_id=cluster_id,
                    results=_results)
            finally:
                # print out summaries every 10 solutions
                if (i_sample + 1) % 10 == 0:
                    n_samples_completed = i_sample + 1
                    time_end = time.time()
                    time_total = time_end - time_start_iteration
                    avg_time = time_total / n_samples_completed
                    _str_msg = 'R{}:{} samples completed in {:.4f}s. Avg_time = {:.4f}. n_errors = {}'.format(
                        self.mpi_rank, n_samples_completed, time_total,
                        avg_time, _n_errors)
                    self.log(_str_msg)

        d = OrderedDict()
        d['kde_bandwidth'] = OrderedDict()
        d['kde_bandwidth']['type'] = self.kde_bw_type
        d['kde_bandwidth']['h'] = self.kde_bw

Example #28

0

Show file

File: pipeline.py Project: mastricker/pypospack

class PyposmatPipeline(object):
    def __init__(self,
                 o_logger=None,
                 configuration_fn=None,
                 data_fn=None,
                 df=None):
        self.o_logger = o_logger  # logging file object
        self.configuration_fn = configuration_fn
        self.configuration = None
        self.data_fn = data_fn
        self.data = None
        self.df = df

        self.parameter_names = None
        self.error_names = None
        self.qoi_names = None
        self.n_parameter_names = None  # normalized
        self.n_error_names = None  # normalized
        self.n_qoi_names = None  # normalized
        self.pca_names = None
        self.manifold_names = None

    def read_configuration(self, filename):
        with open(filename, 'r') as f:
            config = yaml.load(f, OrderedDictYAMLLoader)
        self.configuration = config

    def write_configuration(self, filename, d):
        with open(filename, 'w') as f:
            yaml.dump(d, f, default_flow_style=False)

    def read_data(self, filename):
        self.data = PyposmatDataFile()
        self.data.read(filename)
        self.df = self.data.df
        self.parameter_names = self.data.parameter_names
        self.error_names = self.data.error_names
        self.qoi_names = self.data.qoi_names

    def log(self, msg):
        if self.o_logger is None:
            print(msg)
        else:
            self.o_logger.write(msg)

    def reset(self, o_segment):
        assert isinstance(o_segment, BasePipeSegment)
        self.df = o_segment.df
        self.parameter_names = o_segment.parameter_names
        self.error_names = o_segment.error_names
        self.qoi_names = o_segment.qoi_names
        self.n_parameter_names = o_segment.n_parameter_names
        self.n_error_names = o_segment.n_error_names
        self.n_qoi_names = o_segment.n_qoi_names
        self.pca_names = o_segment.pca_names
        self.manifold_names = o_segment.manifold_names

    def spawn_pipeline_segment(self, segment_type):
        if segment_type == 'preprocess':
            from pypospack.pyposmat.data.preprocess import PyposmatPreprocessor
            o_segment = PyposmatPreprocessor()
        elif segment_type == 'pca':
            from pypospack.pyposmat.data.pca_analysis import PyposmatPcaAnalysis
            o_segment = PyposmatPcaAnalysis()
        elif segment_type == 'cluster':
            from pypospack.pyposmat.data.cluster_analysis import SeatonClusterAnalysis
            o_segment = SeatonClusterAnalysis()
        elif segment_type == 'manifold':
            from pypospack.pyposmat.data.manifold_analysis import PyposmatManifoldAnalysis
            o_segment = PyposmatManifoldAnalysis()
        elif segment_type == 'plot':
            from pypospack.pyposmat.data.plotting import PyposmatPlotter
            o_segment = PyposmatPlotter()
        else:
            raise ValueError("unknown segment type")

        o_segment.o_logger = self.o_logger
        o_segment.df = self.df
        o_segment.parameter_names = self.parameter_names
        o_segment.error_names = self.error_names
        o_segment.qoi_names = self.qoi_names
        o_segment.n_parameter_names = self.n_parameter_names
        o_segment.n_error_names = self.n_error_names
        o_segment.n_qoi_names = self.n_qoi_names
        o_segment.pca_names = self.pca_names
        o_segment.manifold_names = self.manifold_names

        return o_segment

    def make_function_calls(self, o_segment, calls):
        for index in calls:
            self.log("calling function {}".format(calls[index]['function']))
            func = getattr(o_segment, calls[index]['function'])
            kwargs = calls[index]['args']
            func(**kwargs)

    def run(self):
        pipeline_start_time = time.time()
        for index in self.configuration:
            self.log("starting step {} of {}".format(
                index + 1,
                len(self.configuration)))  # +1 to count like a normal person
            step_start_time = time.time()
            o_segment = self.spawn_pipeline_segment(
                self.configuration[index]['segment_type'])
            self.make_function_calls(
                o_segment=o_segment,
                calls=self.configuration[index]['function_calls'])
            self.reset(o_segment)
            step_end_time = time.time()
            step_delta = step_end_time - step_start_time
            step_delta = round(step_delta, 4)
            self.log("step {} complete in {} seconds".format(
                index + 1, step_delta))
        pipeline_end_time = time.time()
        pipeline_delta = pipeline_end_time - pipeline_start_time
        pipeline_delta = round(pipeline_delta, 4)
        self.log("pipeline complete in {} seconds\n".format(pipeline_delta))

Example #29

0

Show file

File: test__init.py Project: mastricker/pypospack

def dev__read():
    from pypospack.pyposmat.data import PyposmatDataFile
    o = PyposmatDataFile()
    o.read(filename=configuration_filename)

Example #30

0

Show file

class PyposmatBokehVisualizer(object):
    def __init__(self):
        bokeh_tools = ['box_select', 'reset', 'box_zoom', 'pan']
        self.bokeh_tools = ', '.join(bokeh_tools)

    def read_configuration(self, filename):
        self.configuration = PyposmatConfigurationFile()
        self.configuration.read(filename=filename)

    def read_data(self, filename):
        self.datafile = PyposmatDataFile()
        self.datafile.read(filename=filename)

        self.parameter_names = list(self.datafile.parameter_names)
        self.qoi_names = list(self.datafile.qoi_names)
        self.error_names = list(self.datafile.error_names)

        self.param_names = list(self.datafile.parameter_names)
        self.qoi_names = list(self.datafile.qoi_names)
        self.err_names = list(self.datafile.error_names)
        print("parameter names")
        print(type(self.param_names))
        for i, v in enumerate(self.param_names):
            print("{:3} {:<20}".format(i, v))
        print("qoi names")
        print(type(self.qoi_names))
        for i, v in enumerate(self.qoi_names):
            print("{:3} {:<20}".format(i, v))
        print("error_names")
        print(type(self.err_names))
        for i, v in enumerate(self.err_names):
            print("{:3} {:<20}".format(i, v))
        # generate pandas dataframes
        self.param_df = copy.deepcopy(self.datafile.df[self.param_names])
        self.qoi_df = copy.deepcopy(self.datafile.df[self.qoi_names])
        self.err_df = copy.deepcopy(self.datafile.df[self.err_names])
        self.total_df = pd.concat([self.param_df, self.qoi_df, self.err_df],
                                  axis=1)

    def update_data(self, param_x, param_y, err_x, err_y):
        self.total_df['param_x'] = self.total_df[param_x]
        self.total_df['param_y'] = self.total_df[param_y]
        self.total_df['err_x'] = self.total_df[err_x]
        self.total_df['err_y'] = self.total_df[err_y]
        self.source.data = dict(param_x=self.total_df['param_x'],
                                param_y=self.total_df['param_y'],
                                err_x=self.total_df['err_x'],
                                err_y=self.total_df['err_y'])

    def nix(self, val, lst):
        return [x for x in lst if x != val]

    def setup_bokeh_frame(self, doc):
        self.source = ColumnDataSource(
            data=dict(param_x=[], param_y=[], err_x=[], err_y=[]))
        self.source_static = ColumnDataSource(
            data=dict(param_x=[], param_y=[], err_x=[], err_y=[]))
        '''
        ---------------------------------------------------------------
        Define Param Graph
        ---------------------------------------------------------------
        '''
        self.param_graph = {}
        self.param_graph['obj_x_select'] = Select(value=self.param_names[0],
                                                  options=self.nix(
                                                      self.param_names[1],
                                                      self.param_names))

        self.param_graph['obj_y_select'] = Select(value=self.param_names[1],
                                                  options=self.nix(
                                                      self.param_names[0],
                                                      self.param_names))

        self.param_graph['x_min_entry'] = TextInput(placeholder='Min X Value',
                                                    value='')
        self.param_graph['x_max_entry'] = TextInput(placeholder='Max X Value',
                                                    value='')
        self.param_graph['y_min_entry'] = TextInput(placeholder='Min Y Value',
                                                    value='')
        self.param_graph['y_max_entry'] = TextInput(placeholder='Max Y Value',
                                                    value='')

        self.param_graph['plot_width'] = 610
        self.param_graph['plot_height'] = 400
        self.param_graph['tools'] = self.bokeh_tools
        self.param_graph['obj_figure'] = figure(
            plot_width=self.param_graph['plot_width'],
            plot_height=self.param_graph['plot_height'],
            tools=self.param_graph['tools'],
            title=self.param_graph['obj_x_select'].value + ' vs. ' +
            self.param_graph['obj_y_select'].value)
        self.param_graph['obj_figure'].xaxis.axis_label = self.param_graph[
            'obj_x_select'].value
        self.param_graph['obj_figure'].yaxis.axis_label = self.param_graph[
            'obj_y_select'].value
        self.param_graph['obj_glyph'] = Circle(x='param_x',
                                               y='param_y',
                                               size=1,
                                               fill_color='#5F77D5',
                                               line_color='#5F77D5')
        self.param_graph['obj_figure'].add_glyph(self.source,
                                                 self.param_graph['obj_glyph'])
        '''
        ---------------------------------------------------------------
        Define Err Graph
        ---------------------------------------------------------------
        '''
        self.err_graph = {}
        self.err_graph['obj_x_select'] = Select(value=self.err_names[0],
                                                options=self.nix(
                                                    self.err_names[1],
                                                    self.err_names))
        self.err_graph['obj_y_select'] = Select(value=self.err_names[1],
                                                options=self.nix(
                                                    self.err_names[0],
                                                    self.err_names))

        self.err_graph['x_min_entry'] = TextInput(placeholder='Min X Value',
                                                  value='')
        self.err_graph['x_max_entry'] = TextInput(placeholder='Max X Value',
                                                  value='')
        self.err_graph['y_min_entry'] = TextInput(placeholder='Min Y Value',
                                                  value='')
        self.err_graph['y_max_entry'] = TextInput(placeholder='Max Y Value',
                                                  value='')

        self.err_graph['plot_width'] = 610
        self.err_graph['plot_height'] = 400
        self.err_graph['tools'] = self.bokeh_tools
        self.err_graph['obj_figure'] = figure(
            plot_width=self.err_graph['plot_width'],
            plot_height=self.err_graph['plot_height'],
            tools=self.err_graph['tools'],
            title=self.err_graph['obj_x_select'].value + ' vs. ' +
            self.err_graph['obj_y_select'].value)
        self.err_graph['obj_figure'].xaxis.axis_label = self.err_graph[
            'obj_x_select'].value
        self.err_graph['obj_figure'].yaxis.axis_label = self.err_graph[
            'obj_y_select'].value
        self.err_graph['obj_glyph'] = Circle(x='err_x',
                                             y='err_y',
                                             size=1,
                                             fill_color='#5F77D5',
                                             line_color='#5F77D5')
        self.err_graph['obj_figure'].add_glyph(self.source,
                                               self.err_graph['obj_glyph'])

        def update():
            param_name_x = self.param_graph['obj_x_select'].value
            param_name_y = self.param_graph['obj_y_select'].value
            err_name_x = self.err_graph['obj_x_select'].value
            err_name_y = self.err_graph['obj_y_select'].value

            self.update_data(param_name_x, param_name_y, err_name_x,
                             err_name_y)

        param_widgets = bokeh.layouts.row(self.param_graph['obj_x_select'],
                                          self.param_graph['obj_y_select'])
        param_x_entry = bokeh.layouts.row(self.param_graph['x_min_entry'],
                                          self.param_graph['x_max_entry'])
        param_y_entry = bokeh.layouts.row(self.param_graph['y_min_entry'],
                                          self.param_graph['y_max_entry'])
        param_pane = bokeh.layouts.column(param_widgets,
                                          self.param_graph['obj_figure'],
                                          param_x_entry, param_y_entry)
        err_widgets = bokeh.layouts.row(self.err_graph['obj_x_select'],
                                        self.err_graph['obj_y_select'])
        err_x_entry = bokeh.layouts.row(self.err_graph['x_min_entry'],
                                        self.err_graph['x_max_entry'])
        err_y_entry = bokeh.layouts.row(self.err_graph['y_min_entry'],
                                        self.err_graph['y_max_entry'])
        err_pane = bokeh.layouts.column(err_widgets,
                                        self.err_graph['obj_figure'],
                                        err_x_entry, err_y_entry)
        layout = bokeh.layouts.row(param_pane, err_pane)
        doc.add_root(layout)
        update()

        # callback functions
        def param_x_select_change(attrname, old, new):
            self.source.data['param_x'] = self.total_df[new]
            self.param_graph[
                'obj_figure'].title.text = new + ' vs. ' + self.param_graph[
                    'obj_y_select'].value
            self.param_graph['obj_figure'].xaxis.axis_label = new

        def param_y_select_change(attrname, old, new):
            self.source.data['param_y'] = self.total_df[new]
            self.param_graph['obj_figure'].title.text = self.param_graph[
                'obj_x_select'].value + ' vs. ' + new
            self.param_graph['obj_figure'].yaxis.axis_label = new

        self.param_graph['obj_x_select'].on_change('value',
                                                   param_x_select_change)
        self.param_graph['obj_y_select'].on_change('value',
                                                   param_y_select_change)

        def err_x_select_change(attrname, old, new):
            self.source.data['err_x'] = self.total_df[new]
            self.err_graph[
                'obj_figure'].title.text = new + ' vs. ' + self.err_graph[
                    'obj_y_select'].value
            self.err_graph['obj_figure'].xaxis.axis_label = new

        def err_y_select_change(attrname, old, new):
            self.source.data['err_y'] = self.total_df[new]
            self.err_graph['obj_figure'].title.text = self.err_graph[
                'obj_x_select'].value + ' vs. ' + new
            self.err_graph['obj_figure'].yaxis.axis_label = new

        self.err_graph['obj_x_select'].on_change('value', err_x_select_change)
        self.err_graph['obj_y_select'].on_change('value', err_y_select_change)

        def source_callback(attrname, old, new):
            selected_index_list = list(new['1d']['indices'])
            selected_rows = []
            for i in selected_index_list:
                data_row = self.total_df.ix[i]
                selected_rows.append(data_row)
            formatted_rows = []
            for rows in selected_rows:
                rows = rows[:
                            -4]  # remove the 4 copied columns used in source callback
                formatted_rows.append(list(rows.get_values()))
            '''
            for rows in selected_rows:
                param_x_row = self.param_graph['obj_x_select'].value+': '+str(rows[self.param_graph['obj_x_select'].value])
                param_y_row = self.param_graph['obj_y_select'].value+': '+str(rows[self.param_graph['obj_y_select'].value])
                err_x_row = self.err_graph['obj_x_select'].value+': '+str(rows[self.err_graph['obj_x_select'].value])
                err_y_row = self.err_graph['obj_y_select'].value+': '+str(rows[self.err_graph['obj_y_select'].value])
                formatted_rows.append(str(param_x_row)+' '+str(param_y_row)+' '+str(err_x_row)+' '+str(err_y_row))
            '''
            with open('selected_points.txt', 'w') as f:
                f.write(' '.join(self.param_names) + ' ' +
                        ' '.join(self.err_names) + '\n')
                for fr in formatted_rows:
                    # apparently python cannot write a list to a file so the extra formatting is necessary
                    fr = str(fr)
                    fr.replace('[', '')
                    fr.replace(']', '')
                    f.write(fr + '\n')

        self.source.on_change('selected', source_callback)

        def param_x_min_callback(attrname, old, new):
            self.param_graph['obj_figure'].x_range.start = float(new)

        def param_x_max_callback(attrname, old, new):
            self.param_graph['obj_figure'].x_range.end = float(new)

        def param_y_min_callback(attrname, old, new):
            self.param_graph['obj_figure'].y_range.start = float(new)

        def param_y_max_callback(attrname, old, new):
            self.param_graph['obj_figure'].y_range.end = float(new)

        self.param_graph['x_min_entry'].on_change('value',
                                                  param_x_min_callback)
        self.param_graph['x_max_entry'].on_change('value',
                                                  param_x_max_callback)
        self.param_graph['y_min_entry'].on_change('value',
                                                  param_y_min_callback)
        self.param_graph['y_max_entry'].on_change('value',
                                                  param_y_max_callback)

        def err_x_min_callback(attrname, old, new):
            self.err_graph['obj_figure'].x_range.start = float(new)

        def err_x_max_callback(attrname, old, new):
            self.err_graph['obj_figure'].x_range.end = float(new)

        def err_y_min_callback(attrname, old, new):
            self.err_graph['obj_figure'].y_range.start = float(new)

        def err_y_max_callback(attrname, old, new):
            self.err_graph['obj_figure'].y_range.end = float(new)

        self.err_graph['x_min_entry'].on_change('value', err_x_min_callback)
        self.err_graph['x_max_entry'].on_change('value', err_x_max_callback)
        self.err_graph['y_min_entry'].on_change('value', err_y_min_callback)
        self.err_graph['y_max_entry'].on_change('value', err_y_max_callback)

    def start_bokeh_server(self):
        self.bokeh_app = Application(FunctionHandler(self.setup_bokeh_frame))
        self.bokeh_server = Server({'/': self.bokeh_app}, num_procs=1)
        self.bokeh_server.start()

        # start io loop for bokeh_server
        self.bokeh_server.io_loop.add_callback(self.bokeh_server.show, '/')
        self.bokeh_server.io_loop.start()