Python read_yaml Examples, utils.file.read_yaml Python Examples

Example #1

0

Show file

File: netlist.py Project: kouroshHakha/blackbox_eval_engine

    def __init__(self, num_process: int, design_netlist: PathLike,
                 root_dir: PathLike = None) -> None:

        if root_dir is None:
            self.root_dir: Path = Path(NgSpiceWrapper.BASE_TMP_DIR).resolve()
        else:
            self.root_dir: Path = Path(root_dir).resolve()

        self.num_process: int = num_process
        self.base_design_name: str = Path(design_netlist).stem
        self.gen_dir: Path = self.root_dir / f'designs_{self.base_design_name}'

        self.gen_dir.mkdir(parents=True, exist_ok=True)

        with open(design_netlist, 'r') as raw_file:
            self.content = raw_file.read()

        # get/create cache file
        self.cache_path = self.gen_dir / 'cache.yaml'
        self.cache: Dict[str, Tuple[int, str]]
        self.updated_cache = False
        if self.cache_path.exists():
            self.cache = read_yaml(self.cache_path) or {}
            stat = os.stat(str(self.cache_path))
            self.last_cache_mtime = stat[-1]
        else:
            self.cache = {}
            self.last_cache_mtime = 0
        # atexit takes care of saving the current cache content in case of an error
        atexit.register(self._write_cache)

Example #2

0

Show file

File: netlist.py Project: kouroshHakha/blackbox_eval_engine

 def _write_cache(self):
     if self.updated_cache:
         # read the yaml if cache file already exists and has been modified since last time visited
         if self.cache_path.exists():
             stat = os.stat(str(self.cache_path))
             if self.last_cache_mtime < stat[-1]:
                 current_cache = read_yaml(self.cache_path)
             else:
                 current_cache = {}
         else:
             current_cache = {}
         current_cache.update(self.cache)
         # print(f'Saving cache for {self.base_design_name} ....')
         write_yaml(self.cache_path, current_cache)
         # update last mtime stamp after updating cache file
         self.last_cache_mtime = os.stat(str(self.cache_path))[-1]
         self.updated_cache = False

Example #3

0

Show file

File: random.py Project: kouroshHakha/gacem

    def __init__(self, spec_file: str = '', spec_dict: Optional[Mapping[str, Any]] = None,
                 **kwargs) -> None:
        LoggingBase.__init__(self)

        if spec_file:
            specs = read_yaml(spec_file)
        else:
            specs = spec_dict

        self.specs = specs
        params = specs['params']

        try:
            self.work_dir = params['work_dir']
        except KeyError:
            unique_name = time.strftime('%Y%m%d%H%M%S')
            self.work_dir = Path(specs['root_dir']) / f'random_{unique_name}'
            write_yaml(self.work_dir / 'params.yaml', specs, mkdir=True)


        self.ndim = params['ndim']
        self.goal = params['goal_value']
        self.mode = params['mode']
        self.input_scale = params['input_scale']

        eval_fn = params['eval_fn']
        try:
            self.fn = registered_functions[eval_fn]
        except KeyError:
            raise ValueError(f'{eval_fn} is not a valid benchmark function')

        # hacky version of passing input vectors around
        self.input_vectors_norm = [np.linspace(start=-1.0, stop=1.0, dtype='float32',
                                               num=100) for _ in range(self.ndim)]
        self.input_vectors = [self.input_scale * vec for vec in self.input_vectors_norm]
        # TODO: remove this hacky way of keeping track of delta
        self.delta = self.input_vectors_norm[0][-1] - self.input_vectors_norm[0][-2]

Example #4

0

Show file

import argparse

from utils.file import read_yaml
from utils.pdb import register_pdb_hook
register_pdb_hook()

import logging
logging.basicConfig(level=logging.INFO)

from bbbo.explorer.bo import BOExplorer


def _parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('spec', type=str, help='input yaml file.')

    _args = parser.parse_args()
    return _args


if __name__ == '__main__':

    _args = _parse_args()

    spec = read_yaml(_args.spec)
    explorer = BOExplorer(spec)
    res = explorer.start()

Example #5

0

Show file

    exper_path = Path(sys.argv[1])

    seed_paths = [x for x in exper_path.iterdir() if x.is_dir()]
    for seed_path in seed_paths:

        seed_name = seed_path.name
        output_path = seed_path / 'pmap_output'
        output_path.mkdir(exist_ok=True)

        writer_path = output_path / 'writer'
        if writer_path.exists():
            shutil.rmtree(writer_path)
        writer = SummaryWriter(log_dir=str(writer_path))

        res = read_pickle(seed_path / 'res.pkl')
        yaml_specs = read_yaml(seed_path / 'spec.yaml')
        env = import_bb_env(yaml_specs['env'])

        Xs = np.array(res['X'])
        costs = np.array(res['y'])
        vmax = 3
        values = -np.log(costs + 10**(-vmax))

        nsamples, xdim = Xs.shape
        time_cnt = np.arange(nsamples)

        params_names = list(env.params_vec.keys())

        prefix = f'{seed_path}/{seed_name}'
        for i in range(xdim):
            xmin = env.input_bounds[0][i] * 0.9

Example #6

0

Show file

    def __init__(self,
                 spec_file: str = '',
                 spec_dict: Optional[Mapping[str, Any]] = None,
                 load: bool = False,
                 use_time_stamp: bool = True,
                 **kwargs) -> None:
        """
        Parameters
        ----------
        spec_file: str
        spec_dict: Dict[str, Any]
        some non-obvious fields
            elite_criteria: str
                'optim': from sorted x1, ..., xn choose p-quantile
                'csp': constraint satisfaction is enough, from x1, ..., xn
                choose p-quantile if it is worst than the constraint else choose all which are
                better than the constraint
            allow_repeated: bool
                True to allow repeated samples to be added to the buffer, else all samples in buffer
                will have equal likelihood when drawn from it.
            on_policy: bool
                True to allow on_policy sample usage, meaning that we won't use samples from
                previous policies to train the current policy (samples are not drawn from
                CacheBuffer)
        load: bool
        kwargs: Dict[str, Any]
        """
        LoggingBase.__init__(self)

        if spec_file:
            specs = read_yaml(spec_file)
        else:
            specs = spec_dict

        self.specs = specs
        params = specs['params']

        if load:
            self.work_dir = Path(spec_file).parent
        else:
            suffix = params.get('suffix', '')
            prefix = params.get('prefix', '')
            if use_time_stamp:
                unique_name = time.strftime('%Y%m%d%H%M%S')
                unique_name = get_full_name(unique_name, prefix, suffix)
            else:
                unique_name = f'{prefix}' if prefix else ''
                if suffix:
                    unique_name = f'{unique_name}_{suffix}' if unique_name else f'{suffix}'

            self.work_dir = Path(specs['root_dir']) / f'{unique_name}'
            write_yaml(self.work_dir / 'params.yaml', specs, mkdir=True)

        self.load = load
        self.seed = params['seed']
        self.ndim = params['ndim']
        self.nsamples = params['nsamples']
        self.n_init_samples = params['n_init_samples']
        self.niter = params['niter']
        self.cut_off = params['cut_off']
        self.input_scale = params['input_scale']
        # goal has to always be positive if not we'll change mode and negate self.goal
        self.goal = params['goal_value']
        self.mode = params['mode']

        self.allow_repeated = params.get('allow_repeated', False)
        self.elite_criteria = params.get('elite_criteria', 'optim')
        self.on_policy = params.get('on_policy', False)

        if self.elite_criteria not in ['csp', 'optim']:
            raise ValueError('invalid elite criteria: optim | csp')

        # allow repeated does not make sense when sampling is on-policy (on-policy: T -> repeat: T)
        self.allow_repeated = self.on_policy or self.allow_repeated

        eval_fn = params['fn']
        try:
            fn = registered_functions[eval_fn]
            self.fn = fn
        except KeyError:
            raise ValueError(f'{eval_fn} is not a valid benchmark function')

        if self.goal < 0:
            self.mode = 'le' if self.mode == 'ge' else 'ge'
            self.fn = lambda x: -fn(x)

        # hacky version of passing input vectors around
        self.input_vectors_norm = [
            np.linspace(start=-1.0, stop=1.0, dtype='float32', num=100)
            for _ in range(self.ndim)
        ]
        self.input_vectors = [
            self.input_scale * vec for vec in self.input_vectors_norm
        ]

        self.cem = CEM(self.input_vectors,
                       dist_type=params['base_fn'],
                       average_coeff=params.get('average_coeff', 1),
                       gauss_sigma=params.get('gauss_sigma', None))
        self.buffer = CacheBuffer(self.mode,
                                  self.goal,
                                  self.cut_off,
                                  with_frequencies=self.allow_repeated)

        self.buffer_temp = {}
        self.fvals = SortedList()

Example #7

0

Show file

File: compare_solutions.py Project: kouroshHakha/gacem

def main(specs, force_replot=False):
    nsamples = specs['nsamples']
    root_dir = Path(specs.get('root_dir', ''))

    prefix = specs.get('prefix', '')
    method = specs.get('method', 'pca')
    seed = specs.get('seed', 10)
    solution_only = specs.get('solution_only', False)

    samples_list, labels_list = [], []
    init_pop_list, pop_labels_list = [], []

    label_map = {}

    work_dir = root_dir / 'model_comparison'
    datasets_path = work_dir / 'datasets'
    datasets_path.parent.mkdir(exist_ok=True, parents=True)

    sol_all = 'sol' if solution_only else 'all'
    dataset_suf = f'n{nsamples}_' + sol_all
    fig_name = get_full_name('comparison', prefix,
                             f'{method}_{sol_all}_s{seed}')

    # try reading the cache set
    try:
        cache = read_pickle(work_dir / 'cache.pickle')
    except FileNotFoundError:
        cache = set()

    # find a unique fname based on the content of spec file
    spec_immutable = to_immutable(specs)

    for index in itertools.count():
        fig_path = work_dir / f'{fig_name}_{index}.png'

        # increment index if fig_path exists and spec is new
        if not fig_path.exists() or force_replot:
            break
        else:
            if spec_immutable in cache:
                print('nothing is new')
                exit()

    cache.add(spec_immutable)

    # noinspection PyUnboundLocalVariable
    fig_title = str(fig_path.stem)

    for label, (label_str, model_str) in enumerate(specs['models'].items()):
        data_path = datasets_path / f'{model_str}_{dataset_suf}.pickle'

        if data_path.exists():
            print(f'loading dataset {label}: {label_str}')
            content = read_pickle(data_path)
            samples = content['samples']
        else:
            print(f'sampling model {label} : {label_str}')
            model_path = root_dir / model_str / 'params.yaml'
            model_specs = read_yaml(model_path)
            alg_cls_str = model_specs.pop('alg_class')
            alg_cls = cast(Type[LoggingBase], import_class(alg_cls_str))
            alg = alg_cls(model_path, load=True)

            # noinspection PyUnresolvedReferences
            samples = alg.load_and_sample(nsamples,
                                          only_positive=solution_only)
            print(f'saving into {str(data_path)}')
            write_pickle(data_path, dict(samples=samples))

        labels = np.ones(shape=samples.shape[0]) * label
        label_map[label] = label_str

        # content = read_pickle(root_dir / model_str / 'init_buffer.pickle')
        # init_pop = list(map(lambda x: x.item, content['init_buffer'].db_set.keys()))
        # init_pop_list += init_pop
        # pop_labels_list.append(np.ones(shape=len(init_pop)) * label)

        # noinspection PyUnresolvedReferences
        samples_list.append(samples)
        labels_list.append(labels)

    samples = np.concatenate(samples_list, axis=0)
    labels = np.concatenate(labels_list, axis=0)
    # pops = np.stack(init_pop_list, axis=0)
    # pop_labels = np.concatenate(pop_labels_list, axis=0)

    if method == 'pca':
        pca_scatter2d(samples,
                      labels,
                      label_map,
                      fpath=fig_path,
                      alpha=0.5,
                      title=fig_title,
                      edgecolors='none',
                      s=10)
    elif method == 'tsne':
        # import matplotlib.pyplot as plt
        # plt.close()
        # _, axes = plt.subplots(2, 1)
        # tsne_scatter2d(samples, labels, label_map, seed=seed, ax=axes[0], alpha=0.5,
        #                title=fig_title, edgecolors='none', s=10)
        tsne_scatter2d(samples,
                       labels,
                       label_map,
                       seed=seed,
                       fpath=fig_path,
                       alpha=0.5,
                       title=fig_title,
                       edgecolors='none',
                       s=10)
        # tsne_scatter2d(pops, pop_labels, label_map, seed=seed, ax=axes[1], alpha=0.5,
        #                title=fig_title, edgecolors='none', s=10)
        # plt.tight_layout()
        # plt.savefig(fig_path)
    else:
        raise ValueError(
            'invalid dimensionality reduction, valid options are {"pca"| "tsne"}'
        )

    # update cache
    write_pickle(work_dir / 'cache.pickle', cache)

Example #8

0

Show file

File: pickle_migrate.py Project: kouroshHakha/MLutils

"""A script that maps old modules to new modules for pickle compatible imports and re-saves the
data"""
import pickle
import sys
import torch
from pathlib import Path
from importlib import import_module

from utils.pdb import register_pdb_hook
from utils.file import read_yaml
from utils.importlib import import_class

register_pdb_hook()

config_file = Path(__file__).parent / 'compatibilty.yaml'
module_specs = read_yaml(config_file)
import_as = module_specs['import_as']
from_import = module_specs['from_import']

for key, val in import_as.items():
    sys.modules[key] = import_module(val)

for key, val in from_import.items():
    sys.modules[key] = import_class(val)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device(
    'cpu')
for work_dir in Path('data').iterdir():
    if work_dir.is_dir():
        for file in work_dir.iterdir():
            try:

Example #9

0

Show file

from utils.pdb import register_pdb_hook
from utils.importlib import import_class
from utils.file import read_yaml

register_pdb_hook()


def parse_arguments() -> argparse.Namespace:
    parser = argparse.ArgumentParser('Test black box environments')
    parser.add_argument('spec_file',
                        type=str,
                        help='the spec file for the black box environment')
    return parser.parse_args()


if __name__ == '__main__':
    _args = parse_arguments()

    specs = read_yaml(_args.spec_file)
    engine = import_class(specs['bb_engine'])(specs=specs['bb_engine_params'])

    # just generates the designs
    designs_empty = engine.generate_rand_designs(n=2, evaluate=False)
    # evaluates the design
    designs_populated = engine.evaluate(designs_empty, do_interpet=True)
    # generates the design and also evaluates it
    designs = engine.generate_rand_designs(n=2, evaluate=True)

    pdb.set_trace()

Example #10

0

Show file

File: compare_solutions.py Project: kouroshHakha/gacem

                       labels,
                       label_map,
                       seed=seed,
                       fpath=fig_path,
                       alpha=0.5,
                       title=fig_title,
                       edgecolors='none',
                       s=10)
        # tsne_scatter2d(pops, pop_labels, label_map, seed=seed, ax=axes[1], alpha=0.5,
        #                title=fig_title, edgecolors='none', s=10)
        # plt.tight_layout()
        # plt.savefig(fig_path)
    else:
        raise ValueError(
            'invalid dimensionality reduction, valid options are {"pca"| "tsne"}'
        )

    # update cache
    write_pickle(work_dir / 'cache.pickle', cache)


if __name__ == '__main__':
    _args = parse_arguments()

    # noinspection PyUnresolvedReferences
    fpath = _args.spec_fpath
    force_replot = _args.force
    specs = read_yaml(fpath)

    main(specs, force_replot)

Example #11

0

Show file

        'If True loads the results from root_dir in spec_file and continues '
        'training')
    parsed_args = parser.parse_args()

    return parsed_args


if __name__ == '__main__':
    # wierd issue with matplotlib backend https://github.com/matplotlib/matplotlib/issues/8795
    multiprocessing.set_start_method('spawn')

    _args = parse_arguments()
    # noinspection PyUnresolvedReferences
    fpath = _args.spec_fpath

    specs = read_yaml(fpath)
    alg_cls_str = specs['alg_class']
    alg_cls = cast(Type[AlgBase], import_class(alg_cls_str))

    params = specs['params']
    root_dir = Path('data', f'{params["prefix"]}_{params["suffix"]}')
    specs['root_dir'] = str(root_dir)

    processes = []
    for seed_iter in range(_args.nseeds):
        spec_seed = deepcopy(specs)
        seed = (seed_iter + 1) * 10
        spec_seed['params']['seed'] = seed
        spec_seed['params']['prefix'] = f's{seed}'
        spec_seed['params']['suffix'] = ''

Example #12

0

Show file

def import_bb_env(env_yaml_str: Union[str, Path]) -> EvaluationEngineBase:
    specs = read_yaml(env_yaml_str)
    env = import_class(specs['bb_engine'])(specs=specs['bb_engine_params'])
    return env

Example #13

0

Show file

    parsed_args = parser.parse_args()

    return parsed_args


if __name__ == '__main__':
    _args = parse_arguments()

    root_dir = Path(_args.save_path).absolute()
    root_dir.mkdir(parents=True, exist_ok=True)

    perf_list, df_list = [], []
    keys = [Path(path).stem for path in _args.folders]
    for key, path_str in zip(keys, _args.folders):
        path = Path(path_str)
        perf_list.append(read_yaml(path / 'avg_performance.yaml'))
        df_tmp = pd.read_hdf(path / 'df.hdf5', 'df')
        df_tmp['name'] = key
        df_list.append(df_tmp)

    perf_df = pd.DataFrame(perf_list, index=keys)
    perf_df.to_excel(root_dir / 'perf_cmp.xlsx')
    print(perf_df)

    df = pd.concat(df_list, ignore_index=True, sort=False)
    plot_exp(df, 'sample_cnt', 'n_sols_in_buffer', 'name',
             root_dir / 'nsols_sample_cnt.png', _args.legends)
    plot_with_goal(df,
                   'sample_cnt',
                   'top_20',
                   'name',

Example #14

0

Show file

File: cont_autoreg_optim.py Project: kouroshHakha/gacem

    def __init__(self,
                 spec_file: str = '',
                 spec_dict: Optional[Mapping[str, Any]] = None,
                 load: bool = False,
                 use_time_stamp: bool = True,
                 init_buffer_path=None,
                 **kwargs) -> None:
        LoggingBase.__init__(self)

        if spec_file:
            specs = read_yaml(spec_file)
        else:
            specs = spec_dict

        self.specs = specs
        params = specs['params']

        if load:
            self.work_dir = Path(spec_file).parent
        else:
            suffix = params.get('suffix', '')
            prefix = params.get('prefix', '')
            if use_time_stamp:
                unique_name = time.strftime('%Y%m%d%H%M%S')
                unique_name = get_full_name(unique_name, prefix, suffix)
            else:
                unique_name = f'{prefix}' if prefix else ''
                if suffix:
                    unique_name = f'{unique_name}_{suffix}' if unique_name else f'{suffix}'

            self.work_dir = Path(specs['root_dir']) / f'{unique_name}'
            write_yaml(self.work_dir / 'params.yaml', specs, mkdir=True)

        self.load = load
        self.seed = params.get('seed', 10)
        self.ndim = params['ndim']
        self.bsize = params['batch_size']
        self.hiddens = params['hidden_list']
        self.niter = params['niter']
        self.goal = params['goal_value']
        self.mode = params['mode']
        self.viz_rate = self.niter // 10
        self.lr = params['lr']
        self.nepochs = params['nepochs']
        self.nsamples = params['nsamples']
        self.n_init_samples = params['n_init_samples']
        self.init_nepochs = params['init_nepochs']
        self.cut_off = params['cut_off']
        self.beta = params['beta']
        self.nr_mix = params['nr_mix']
        self.base_fn = params['base_fn']
        self.only_pos = params['only_positive']
        # whether to run 1000 epochs of training for the later round of iteration
        self.full_training = params['full_training_last']
        self.input_scale = params['input_scale']
        self.fixed_sigma = params.get('fixed_sigma', None)
        self.on_policy = params.get('on_policy', False)
        self.problem_type = params.get('problem_type', 'csp')

        self.allow_repeated = params.get('allow_repeated', False)
        self.allow_repeated = self.on_policy or self.allow_repeated

        self.important_sampling = params.get('important_sampling', False)
        self.visited_dist: Optional[nn.Module] = None
        self.visited_fixed_sigma = params.get('visited_fixed_sigma', None)
        self.visited_nr_mix = params.get('visited_nr_mix', None)

        self.explore_coeff = params.get('explore_coeff', None)
        self.nepoch_visited = params.get('nepoch_visited', -1)

        self.normalize_weight = params.get('normalize_weight', True)
        self.add_ent_before_norm = params.get(
            'add_entropy_before_normalization', False)
        self.weight_type = params.get('weight_type', 'ind')

        self.model_visited = self.explore_coeff is not None or self.important_sampling

        if self.model_visited and self.nepoch_visited == -1:
            raise ValueError(
                'nepoch_visited should be specified when a model is '
                'learning visited states')

        self.init_buffer_paths = init_buffer_path

        eval_fn = params['eval_fn']
        try:
            self.fn = registered_functions[eval_fn]
        except KeyError:
            raise ValueError(f'{eval_fn} is not a valid benchmark function')

        self.device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        print(f'device: {self.device}')
        self.cpu = torch.device('cpu')
        self.model: Optional[nn.Module] = None
        self.buffer = None
        self.opt = None

        # hacky version of passing input vectors around
        self.input_vectors_norm = [
            np.linspace(start=-1.0, stop=1.0, dtype='float32', num=100)
            for _ in range(self.ndim)
        ]
        self.input_vectors = [
            self.input_scale * vec for vec in self.input_vectors_norm
        ]
        # TODO: remove this hacky way of keeping track of delta
        self.delta = self.input_vectors_norm[0][-1] - self.input_vectors_norm[
            0][-2]

        # keep track of lo and hi for indicies
        self.params_min = np.array([0] * self.ndim)
        self.params_max = np.array([len(x) - 1 for x in self.input_vectors])

        self.fvals = SortedList()

Example #15

0

Show file

 def _read_cache(self) -> Dict[ImmutableSortedDict, str]:
     if self._cache_fname.exists():
         return read_yaml(self._cache_fname)
     return {}