Beispiel #1
0
    def _run_alg(self):
        iter_cnt, tr_losses, avg_cost = self.setup_model_state()

        while iter_cnt < self.niter:
            self.collect_samples(self.nsamples)
            avg_cost.append(self.buffer.mean)

            if iter_cnt == self.niter - 1 and self.full_training:
                tr_loss, tr_loss_list = self.train(iter_cnt + 1,
                                                   self.nepochs * 10)
            else:
                tr_loss, tr_loss_list = self.train(iter_cnt + 1, self.nepochs)

            tr_losses.append(tr_loss_list)
            if (iter_cnt + 1) % 10 == 0 and self.ndim == 2:
                _, xdata_ind = self.model.sample_model(1000, self.bsize,
                                                       self.input_vectors_norm)
                fpath = self.work_dir / get_full_name(
                    name='dist',
                    prefix='training',
                    suffix=f'{iter_cnt+1}_after')
                data_ind = xdata_ind.to(
                    torch.device('cpu')).data.numpy().astype('int')
                data = index_to_xval(self.input_vectors, data_ind)
                s = self.input_scale
                _range = np.array([[-s, s], [-s, s]])
                plt_hist2D(data, fpath=fpath, range=_range, cmap='binary')

            iter_cnt += 1
            self.save_checkpoint(iter_cnt, tr_losses, avg_cost)

        plot_learning_with_epochs(fpath=self.work_dir / 'learning_curve.png',
                                  training=tr_losses)
        plot_cost(avg_cost, fpath=self.work_dir / 'cost.png')
Beispiel #2
0
    def setup_state(self):
        if self.load:
            ckpt_dict = self.load_checkpoint(self.work_dir /
                                             'checkpoint.pickle')
            iter_cnt = ckpt_dict['iter_cnt']
            avg_cost = ckpt_dict['avg_cost']
            sim_cnt_list = ckpt_dict['sim_cnt']
            n_sols_in_buffer = ckpt_dict['n_sols_in_buffer']
            sample_cnt_list = ckpt_dict['sample_cnt']
            top_means = dict(top_20=ckpt_dict['top_20'],
                             top_40=ckpt_dict['top_40'],
                             top_60=ckpt_dict['top_60'])
        else:
            iter_cnt = 0
            avg_cost,  sim_cnt_list, sample_cnt_list, n_sols_in_buffer = [], [], [], []
            top_means = dict(top_20=[], top_40=[], top_60=[])
            samples, sample_fvals = self.collect_samples(self.n_init_samples,
                                                         uniform=True)
            top_samples = self.get_top_samples(0, samples, sample_fvals)
            self.cem.fit(top_samples)
            if self.ndim == 2:
                xdata_ind = self.cem.sample(1000)
                fpath = self.work_dir / get_full_name(
                    name='dist', prefix='training', suffix=f'0_after')
                s = self.input_scale
                _range = np.array([[-s, s], [-s, s]])
                plt_hist2D(index_to_xval(self.input_vectors, xdata_ind),
                           fpath=fpath,
                           range=_range,
                           cmap='binary')

        return iter_cnt, avg_cost, sim_cnt_list, sample_cnt_list, n_sols_in_buffer, top_means
Beispiel #3
0
    def train(self, iter_cnt: int, nepochs: int, split=1.0):
        # treat the sampled data as a static data set and take some gradient steps on it
        xtr, xte, wtr, wte = self.buffer.draw_tr_te_ds(split=split)
        if self.ndim == 2:
            fpath = self.work_dir / get_full_name(
                name='dist', prefix='training', suffix=f'{iter_cnt}_before')
            samples = index_to_xval(self.input_vectors,
                                    xtr[:, 1, :].astype('int'))
            s = self.input_scale
            _range = np.array([[-s, s], [-s, s]])
            plt_hist2D(samples, fpath=fpath, range=_range, cmap='binary')

        # per epoch
        print('-' * 50)
        tr_loss = 0
        te_loss = 0
        tr_loss_list = []
        for epoch_id in range(nepochs):
            tr_nll = self.run_epoch(xtr, wtr, mode='train')
            tr_loss_list.append(tr_nll)
            tr_loss += tr_nll / self.nepochs

            # self.writer.add_scalar('loss', tr_nll, epoch_id)
            print(f'[train_{iter_cnt}] epoch {epoch_id} loss = {tr_nll}')

            if split < 1:
                te_nll = self.run_epoch(xte, wte, mode='test')
                te_loss += te_nll / self.nepochs
                print(f'[test_{iter_cnt}] epoch {epoch_id} loss = {te_nll}')

        if split < 1:
            return tr_loss, te_loss

        return tr_loss, tr_loss_list
Beispiel #4
0
    def setup_model_state(self):
        # load the model or proceed without loading checkpoints
        if self.load:
            items = self.load_checkpoint(self.work_dir / 'checkpoint.tar')
        else:
            # collect samples using the random initial model (probably a bad initialization)
            self.model.eval()
            self.collect_samples(self.n_init_samples)
            # train the init model
            self.model.train()
            self.train(0, self.n_init_samples)

            if self.ndim == 2:
                _, xdata_ind = self.model.sample_model(1000, self.bsize,
                                                       self.input_vectors_norm)
                fpath = self.work_dir / get_full_name(
                    name='dist', prefix='training', suffix=f'0_after')
                data_ind = xdata_ind.to(
                    torch.device('cpu')).data.numpy().astype('int')
                data = index_to_xval(self.input_vectors, data_ind)
                s = self.input_scale
                _range = np.array([[-s, s], [-s, s]])
                plt_hist2D(data, fpath=fpath, range=_range, cmap='binary')

            items = (0, [], [])
            self.save_checkpoint(*items)
        return items
 def _plot_dist(self, data_indices: torch.Tensor, name, prefix, suffix):
     fpath = self.work_dir / get_full_name(name, prefix, suffix)
     data_ind = data_indices.to(self.cpu).data.numpy().astype('int')
     data = index_to_xval(self.input_vectors, data_ind)
     s = self.input_scale
     _range = np.array([[-s, s], [-s, s]])
     plt_hist2D(data, fpath=fpath, range=_range, cmap='binary')
Beispiel #6
0
    def report_accuracy(self, ntimes, nsamples):
        accuracy_list, times, div_list = [], [], []

        if self.ndim == 2:
            xsamples, _, _ = self._sample_model_for_eval(nsamples)
            s = self.input_scale
            _range = np.array([[-s, s], [-s, s]])
            plt_hist2D(xsamples, range=_range,
                       fpath=self.work_dir / get_full_name('trained_policy'), cmap='binary')

        for iter_id in range(ntimes):
            s = time.time()
            xsample, sample_ids, fval = self._sample_model_for_eval(nsamples)
            if self.mode == 'le':
                acc = (fval <= self.goal).sum(-1) / nsamples
                pos_samples = xsample[fval <= self.goal]
            else:
                acc = (fval >= self.goal).sum(-1) / nsamples
                pos_samples = xsample[fval >= self.goal]

            if len(pos_samples) >= self.ndim:
                div = get_diversity_fom(self.ndim, pos_samples)
                div_list.append(div)

            times.append(time.time() - s)
            accuracy_list.append(acc)

        print(f'gen_time / sample = {1e3 * np.mean(times).astype("float") / nsamples:.3f} ms')
        print(f'accuracy_avg = {100 * np.mean(accuracy_list).astype("float"):.6f}, '
              f'accuracy_std = {100 * np.std(accuracy_list).astype("float"):.6f}, '
              f'solution diversity = {np.mean(div_list).astype("float"):.6f}')
Beispiel #7
0
    def check_solutions(self, ntimes: int, nsamples: int) -> None:
        accuracy_rnd_list = []
        total_var_list, pos_var_list = [], []
        diversity_fom_list = []

        if self.ndim == 2:
            rnd_samples, _ = self.sample_data(self.ndim, self.input_vectors, nsamples)
            s = self.input_scale
            _range = np.array([[-s, s], [s, s]])
            plt_hist2D(rnd_samples, fpath=self.work_dir / get_full_name('random_policy'),
                       range=_range, cmap='binary')
            x, y = self.input_vectors
            plot_fn2d(x, y, self.fn, fpath=str(self.work_dir / 'fn2D.png'), cmap='viridis')
            show_solution_region(x, y, self.fn, self.goal, mode=self.mode,
                                 fpath=str(self.work_dir / 'dist2D.png'), cmap='binary')

        vector_mat = np.stack(self.input_vectors, axis=0)
        for iter_id in range(ntimes):
            _, rnd_ids = self.sample_data(self.ndim, self.input_vectors_norm, nsamples)
            rnd_samples = vector_mat[np.arange(self.ndim), rnd_ids]
            total_var = compute_emprical_variation(rnd_samples)
            rnd_fval: np.ndarray = self.fn(rnd_samples)
            if self.mode == 'le':
                pos_samples = rnd_samples[rnd_fval <= self.goal]
                if len(pos_samples) != 0:
                    pos_var = compute_emprical_variation(pos_samples)
                else:
                    pos_var = np.NAN
                accuracy_rnd_list.append((rnd_fval <= self.goal).sum(-1) / nsamples)
            else:
                pos_samples = rnd_samples[rnd_fval >= self.goal]
                if len(pos_samples) != 0:
                    pos_var = compute_emprical_variation(pos_samples)
                else:
                    pos_var = np.NAN
                accuracy_rnd_list.append((rnd_fval >= self.goal).sum(-1) / nsamples)

            pos_var_list.append(pos_var)
            total_var_list.append(total_var)

            if len(pos_samples) >= self.ndim:
                div = get_diversity_fom(self.ndim, pos_samples)
                diversity_fom_list.append(div)

        accuracy_rnd = np.array(accuracy_rnd_list, dtype='float32')
        print(f'accuracy_rnd_avg = {100 * np.mean(accuracy_rnd).astype("float"):.6f}, '
              f'accuracy_rnd_std = {100 * np.std(accuracy_rnd).astype("float"):.6f}')
        print(f'random policy total variation / dim = '
              f'{np.mean(total_var_list).astype("float"):.6f}')

        pos_var_arr = np.array(pos_var_list)
        if len(pos_var_arr[~np.isnan(pos_var_arr)]) == 0:
            print('No positive solution was found with random policy')
        else:
            print(f'pos solution variation / dim ='
                  f' {np.mean(pos_var_arr[~np.isnan(pos_var_arr)]):.6f}')
            print(f'random policy solution diversity FOM: '
                  f'{np.mean(diversity_fom_list).astype("float"):.6f}')
Beispiel #8
0
 def _save_2d_samples(self, model, iter_cnt, nsamples, name='dist'):
     _, xdata_ind = model.sample_model(nsamples, self.bsize, self.input_vectors_norm)
     fpath = self.work_dir / get_full_name(name=name, prefix='training',
                                           suffix=f'{iter_cnt+1}_after')
     data_ind = xdata_ind.to(torch.device('cpu')).data.numpy().astype('int')
     data = index_to_xval(self.input_vectors, data_ind)
     s = self.input_scale
     _range = np.array([[-s, s], [-s, s]])
     plt_hist2D(data, fpath=fpath, range=_range, cmap='binary')
    def setup_model_state(self):
        # load the model or proceed without loading checkpoints
        if self.load:
            ckpt_dict = self.load_checkpoint(self.work_dir / 'checkpoint.tar')
            tr_losses = ckpt_dict['tr_losses']
            iter_cnt = ckpt_dict['iter_cnt']
            avg_cost = ckpt_dict['avg_cost']
            sim_cnt_list = ckpt_dict['sim_cnt']
            n_sols_in_buffer = ckpt_dict['n_sols_in_buffer']
            sample_cnt_list = ckpt_dict['sample_cnt']
            top_means = dict(top_20=ckpt_dict['top_20'],
                             top_40=ckpt_dict['top_40'],
                             top_60=ckpt_dict['top_60'])
        else:
            # collect samples using the random initial model (probably a bad initialization)
            iter_cnt = 0
            tr_losses, avg_cost, \
            sim_cnt_list, sample_cnt_list, n_sols_in_buffer = [], [], [], [], []
            top_means = dict(top_20=[], top_40=[], top_60=[])
            self.model.eval()
            self.collect_samples(self.n_init_samples, uniform=True)
            write_pickle(self.work_dir / 'init_buffer.pickle',
                         dict(init_buffer=self.buffer))
            # train the init model
            self.model.train()
            self.train(0, self.init_nepochs)

            if self.ndim == 2:
                _, xdata_ind = self.sample_model(1000, model=self.model)
                fpath = self.work_dir / get_full_name(
                    name='dist', prefix='training', suffix=f'0_after')
                data_ind = xdata_ind.to(self.cpu).data.numpy().astype('int')
                data = index_to_xval(self.input_vectors, data_ind)
                s = self.input_scale
                _range = np.array([[-s, s], [-s, s]])
                plt_hist2D(data, fpath=fpath, range=_range, cmap='binary')

            saved_data = dict(
                iter_cnt=iter_cnt,
                tr_losses=tr_losses,
                avg_cost=avg_cost,
                sim_cnt=sim_cnt_list,
                n_sols_in_buffer=n_sols_in_buffer,
                sample_cnt=sample_cnt_list,
                **top_means,
            )
            self.save_checkpoint(saved_data)

        return iter_cnt, tr_losses, avg_cost, sim_cnt_list, sample_cnt_list, n_sols_in_buffer, \
               top_means
Beispiel #10
0
    def get_top_samples(self, iter_cnt, samples, sample_fvals):

        if self.on_policy:
            nsamples = len(samples)
            sample_ids = range(nsamples)
            sorted_sample_ids = sorted(sample_ids,
                                       key=lambda i: sample_fvals[i],
                                       reverse=self.mode == 'ge')
            sorted_samples = samples[sorted_sample_ids]

            # find the last index which satisfies the constraint
            cond = sample_fvals <= self.goal if self.mode == 'le' else sample_fvals >= self.goal
            top_index = cond.sum(-1).astype('int')

        else:
            data, _, weights, _ = self.buffer.draw_tr_te_ds(
                split=1, normalize_weight=False)
            samples = data[:, 1].astype('int')
            nsamples = len(samples)
            weights_iter = iter(weights)
            sorted_samples = np.stack(
                sorted(samples, key=lambda x: next(weights_iter),
                       reverse=True),
                axis=0,
            )
            top_index = (weights == 1).sum(-1).astype('int')

        if self.elite_criteria == 'optim':
            top_index = self.cut_off
        elif self.elite_criteria == 'csp':
            top_index = max(top_index, min(self.cut_off, nsamples))

        top_samples = sorted_samples[:top_index]

        # plot exploration
        if self.ndim == 2:
            fpath = self.work_dir / get_full_name(
                name='dist', prefix='training', suffix=f'{iter_cnt}_before')
            s = self.input_scale
            _range = np.array([[-s, s], [-s, s]])
            plt_hist2D(index_to_xval(self.input_vectors, samples),
                       fpath=fpath,
                       range=_range,
                       cmap='binary')

        return top_samples
Beispiel #11
0
    def _run_alg(self):
        ret = self.setup_state()
        iter_cnt, avg_cost, sim_cnt_list, sample_cnt_list, n_sols_in_buffer, top_means = ret
        while iter_cnt < self.niter:
            print(f'iter {iter_cnt}')
            # ---- update plotting variables
            # sim_cnt_list.append(self.buffer.size)
            # n_sols_in_buffer.append(self.buffer.n_sols)
            # sample_cnt_list.append(self.buffer.tot_freq)
            # top_means['top_20'].append(self.buffer.topn_mean(20))
            # top_means['top_40'].append(self.buffer.topn_mean(40))
            # top_means['top_60'].append(self.buffer.topn_mean(60))
            sim_cnt_list.append((iter_cnt + 1) * self.nsamples +
                                self.n_init_samples)
            n_sols_in_buffer.append(len(self.buffer_temp))
            sample_cnt_list.append((iter_cnt + 1) * self.nsamples +
                                   self.n_init_samples)
            top_means['top_20'].append(np.mean(self.fvals[:20]))
            top_means['top_40'].append(np.mean(self.fvals[:40]))
            top_means['top_60'].append(np.mean(self.fvals[:60]))

            samples, sample_fvals = self.collect_samples(self.nsamples)
            avg_cost.append(
                sample_fvals.mean() if self.on_policy else self.buffer.mean)
            top_samples = self.get_top_samples(iter_cnt + 1, samples,
                                               sample_fvals)
            self.cem.fit(top_samples)

            if (iter_cnt + 1) % 10 == 0 and self.ndim == 2:
                xdata_ind = self.sample_model(1000)
                fpath = self.work_dir / get_full_name(
                    name='dist',
                    prefix='training',
                    suffix=f'{iter_cnt+1}_after')
                s = self.input_scale
                _range = np.array([[-s, s], [-s, s]])
                plt_hist2D(index_to_xval(self.input_vectors, xdata_ind),
                           fpath=fpath,
                           range=_range,
                           cmap='binary')

            iter_cnt += 1

            saved_data = dict(
                iter_cnt=iter_cnt,
                avg_cost=avg_cost,
                sim_cnt=sim_cnt_list,
                n_sols_in_buffer=n_sols_in_buffer,
                sample_cnt=sample_cnt_list,
                **top_means,
            )
            self.save_checkpoint(saved_data)

        plot_cost(avg_cost, fpath=self.work_dir / 'cost.png')
        plot_x_y(
            sample_cnt_list,
            n_sols_in_buffer,
            #annotate=sim_cnt_list,marker='s', fillstyle='none'
            fpath=self.work_dir / 'n_sols.png',
            xlabel='n_freq',
            ylabel=f'n_sols')
Beispiel #12
0
    def __init__(self,
                 spec_file: str = '',
                 spec_dict: Optional[Mapping[str, Any]] = None,
                 load: bool = False,
                 use_time_stamp: bool = True,
                 **kwargs) -> None:
        """
        Parameters
        ----------
        spec_file: str
        spec_dict: Dict[str, Any]
        some non-obvious fields
            elite_criteria: str
                'optim': from sorted x1, ..., xn choose p-quantile
                'csp': constraint satisfaction is enough, from x1, ..., xn
                choose p-quantile if it is worst than the constraint else choose all which are
                better than the constraint
            allow_repeated: bool
                True to allow repeated samples to be added to the buffer, else all samples in buffer
                will have equal likelihood when drawn from it.
            on_policy: bool
                True to allow on_policy sample usage, meaning that we won't use samples from
                previous policies to train the current policy (samples are not drawn from
                CacheBuffer)
        load: bool
        kwargs: Dict[str, Any]
        """
        LoggingBase.__init__(self)

        if spec_file:
            specs = read_yaml(spec_file)
        else:
            specs = spec_dict

        self.specs = specs
        params = specs['params']

        if load:
            self.work_dir = Path(spec_file).parent
        else:
            suffix = params.get('suffix', '')
            prefix = params.get('prefix', '')
            if use_time_stamp:
                unique_name = time.strftime('%Y%m%d%H%M%S')
                unique_name = get_full_name(unique_name, prefix, suffix)
            else:
                unique_name = f'{prefix}' if prefix else ''
                if suffix:
                    unique_name = f'{unique_name}_{suffix}' if unique_name else f'{suffix}'

            self.work_dir = Path(specs['root_dir']) / f'{unique_name}'
            write_yaml(self.work_dir / 'params.yaml', specs, mkdir=True)

        self.load = load
        self.seed = params['seed']
        self.ndim = params['ndim']
        self.nsamples = params['nsamples']
        self.n_init_samples = params['n_init_samples']
        self.niter = params['niter']
        self.cut_off = params['cut_off']
        self.input_scale = params['input_scale']
        # goal has to always be positive if not we'll change mode and negate self.goal
        self.goal = params['goal_value']
        self.mode = params['mode']

        self.allow_repeated = params.get('allow_repeated', False)
        self.elite_criteria = params.get('elite_criteria', 'optim')
        self.on_policy = params.get('on_policy', False)

        if self.elite_criteria not in ['csp', 'optim']:
            raise ValueError('invalid elite criteria: optim | csp')

        # allow repeated does not make sense when sampling is on-policy (on-policy: T -> repeat: T)
        self.allow_repeated = self.on_policy or self.allow_repeated

        eval_fn = params['fn']
        try:
            fn = registered_functions[eval_fn]
            self.fn = fn
        except KeyError:
            raise ValueError(f'{eval_fn} is not a valid benchmark function')

        if self.goal < 0:
            self.mode = 'le' if self.mode == 'ge' else 'ge'
            self.fn = lambda x: -fn(x)

        # hacky version of passing input vectors around
        self.input_vectors_norm = [
            np.linspace(start=-1.0, stop=1.0, dtype='float32', num=100)
            for _ in range(self.ndim)
        ]
        self.input_vectors = [
            self.input_scale * vec for vec in self.input_vectors_norm
        ]

        self.cem = CEM(self.input_vectors,
                       dist_type=params['base_fn'],
                       average_coeff=params.get('average_coeff', 1),
                       gauss_sigma=params.get('gauss_sigma', None))
        self.buffer = CacheBuffer(self.mode,
                                  self.goal,
                                  self.cut_off,
                                  with_frequencies=self.allow_repeated)

        self.buffer_temp = {}
        self.fvals = SortedList()
Beispiel #13
0
def main(specs, force_replot=False):
    nsamples = specs['nsamples']
    root_dir = Path(specs.get('root_dir', ''))

    prefix = specs.get('prefix', '')
    method = specs.get('method', 'pca')
    seed = specs.get('seed', 10)
    solution_only = specs.get('solution_only', False)

    samples_list, labels_list = [], []
    init_pop_list, pop_labels_list = [], []

    label_map = {}

    work_dir = root_dir / 'model_comparison'
    datasets_path = work_dir / 'datasets'
    datasets_path.parent.mkdir(exist_ok=True, parents=True)

    sol_all = 'sol' if solution_only else 'all'
    dataset_suf = f'n{nsamples}_' + sol_all
    fig_name = get_full_name('comparison', prefix,
                             f'{method}_{sol_all}_s{seed}')

    # try reading the cache set
    try:
        cache = read_pickle(work_dir / 'cache.pickle')
    except FileNotFoundError:
        cache = set()

    # find a unique fname based on the content of spec file
    spec_immutable = to_immutable(specs)

    for index in itertools.count():
        fig_path = work_dir / f'{fig_name}_{index}.png'

        # increment index if fig_path exists and spec is new
        if not fig_path.exists() or force_replot:
            break
        else:
            if spec_immutable in cache:
                print('nothing is new')
                exit()

    cache.add(spec_immutable)

    # noinspection PyUnboundLocalVariable
    fig_title = str(fig_path.stem)

    for label, (label_str, model_str) in enumerate(specs['models'].items()):
        data_path = datasets_path / f'{model_str}_{dataset_suf}.pickle'

        if data_path.exists():
            print(f'loading dataset {label}: {label_str}')
            content = read_pickle(data_path)
            samples = content['samples']
        else:
            print(f'sampling model {label} : {label_str}')
            model_path = root_dir / model_str / 'params.yaml'
            model_specs = read_yaml(model_path)
            alg_cls_str = model_specs.pop('alg_class')
            alg_cls = cast(Type[LoggingBase], import_class(alg_cls_str))
            alg = alg_cls(model_path, load=True)

            # noinspection PyUnresolvedReferences
            samples = alg.load_and_sample(nsamples,
                                          only_positive=solution_only)
            print(f'saving into {str(data_path)}')
            write_pickle(data_path, dict(samples=samples))

        labels = np.ones(shape=samples.shape[0]) * label
        label_map[label] = label_str

        # content = read_pickle(root_dir / model_str / 'init_buffer.pickle')
        # init_pop = list(map(lambda x: x.item, content['init_buffer'].db_set.keys()))
        # init_pop_list += init_pop
        # pop_labels_list.append(np.ones(shape=len(init_pop)) * label)

        # noinspection PyUnresolvedReferences
        samples_list.append(samples)
        labels_list.append(labels)

    samples = np.concatenate(samples_list, axis=0)
    labels = np.concatenate(labels_list, axis=0)
    # pops = np.stack(init_pop_list, axis=0)
    # pop_labels = np.concatenate(pop_labels_list, axis=0)

    if method == 'pca':
        pca_scatter2d(samples,
                      labels,
                      label_map,
                      fpath=fig_path,
                      alpha=0.5,
                      title=fig_title,
                      edgecolors='none',
                      s=10)
    elif method == 'tsne':
        # import matplotlib.pyplot as plt
        # plt.close()
        # _, axes = plt.subplots(2, 1)
        # tsne_scatter2d(samples, labels, label_map, seed=seed, ax=axes[0], alpha=0.5,
        #                title=fig_title, edgecolors='none', s=10)
        tsne_scatter2d(samples,
                       labels,
                       label_map,
                       seed=seed,
                       fpath=fig_path,
                       alpha=0.5,
                       title=fig_title,
                       edgecolors='none',
                       s=10)
        # tsne_scatter2d(pops, pop_labels, label_map, seed=seed, ax=axes[1], alpha=0.5,
        #                title=fig_title, edgecolors='none', s=10)
        # plt.tight_layout()
        # plt.savefig(fig_path)
    else:
        raise ValueError(
            'invalid dimensionality reduction, valid options are {"pca"| "tsne"}'
        )

    # update cache
    write_pickle(work_dir / 'cache.pickle', cache)
    def train(self, iter_cnt: int, nepochs: int, split=1.0):
        # treat the sampled data as a static data set and take some gradient steps on it
        print('-' * 50)
        if self.on_policy and iter_cnt != 0:
            # TODO: this is a stupid implementation, but ok for now
            xtr, wtr = self._sample_model_with_weights(self.nsamples)
        else:
            xtr, xte, wtr, wte = self.buffer.draw_tr_te_ds(
                split=split, normalize_weight=False)

        if self.model_visited:
            print('Training buffer model:')
            nepochs = self.init_nepochs if iter_cnt == 0 else self.nepoch_visited
            for epoch_id in range(nepochs):
                tr_nll = self.run_epoch(xtr,
                                        wtr,
                                        self.visited_dist,
                                        mode='train',
                                        debug=False)
                print(f'[visit_{iter_cnt}] epoch {epoch_id} loss = {tr_nll}')
            print('Finshed training buffer model')

            if (iter_cnt) % 10 == 0 and self.ndim == 2:
                _, xvisited_ind = self.sample_model(1000,
                                                    model=self.visited_dist)
                self._plot_dist(xvisited_ind, 'dist', 'visited',
                                f'{iter_cnt+1}')

        update_w = self.update_weight(xtr[:, 0, :], wtr)
        # debug
        if iter_cnt < -1:
            values = index_to_xval(self.input_vectors, xtr[:,
                                                           1, :].astype('int'))
            fvals = self.fn(values)
            wtr_norm = (wtr - wtr.mean()) / (wtr.std() + 1e-15)
            fref = sorted(fvals)[self.cut_off - 1]
            print(f'fred = {fref}')
            cond = np.logical_and(fvals >= 20, fvals <= fref)
            for index, wp, wn, wnorm in zip(xtr[:, 1, :][cond], wtr[cond],
                                            update_w[cond], wtr_norm[cond]):
                print(f'index = {index}, weight_before_update = {wp:.4f}, '
                      f'weights_norm = {wnorm:.4f}, '
                      f'weight_after_update = {wn:.4f}')
            pdb.set_trace()

        wtr = update_w

        if self.ndim == 2:
            fpath = self.work_dir / get_full_name(
                name='dist', prefix='training', suffix=f'{iter_cnt}_before')
            samples = index_to_xval(self.input_vectors,
                                    xtr[:, 1, :].astype('int'))
            s = self.input_scale
            plt_hist2D(samples,
                       fpath=fpath,
                       range=np.array([[-s, s], [-s, s]]),
                       cmap='binary')

        # per epoch
        tr_loss = 0
        te_loss = 0
        tr_loss_list = []
        print(f'Training model: fref = {self.buffer.zavg}')
        for epoch_id in range(nepochs):
            tr_nll = self.run_epoch(xtr,
                                    wtr,
                                    self.model,
                                    mode='train',
                                    debug=False)
            tr_loss_list.append(tr_nll)
            tr_loss += tr_nll / self.nepochs

            # self.writer.add_scalar('loss', tr_nll, epoch_id)
            print(f'[train_{iter_cnt}] epoch {epoch_id} loss = {tr_nll}')

            if split < 1:
                te_nll = self.run_epoch(xte, wte, self.model, mode='test')
                te_loss += te_nll / self.nepochs
                print(f'[test_{iter_cnt}] epoch {epoch_id} loss = {te_nll}')

        print('Finished training model.')
        if split < 1:
            return tr_loss, te_loss

        return tr_loss, tr_loss_list
    def __init__(self,
                 spec_file: str = '',
                 spec_dict: Optional[Mapping[str, Any]] = None,
                 load: bool = False,
                 use_time_stamp: bool = True,
                 init_buffer_path=None,
                 **kwargs) -> None:
        LoggingBase.__init__(self)

        if spec_file:
            specs = read_yaml(spec_file)
        else:
            specs = spec_dict

        self.specs = specs
        params = specs['params']

        if load:
            self.work_dir = Path(spec_file).parent
        else:
            suffix = params.get('suffix', '')
            prefix = params.get('prefix', '')
            if use_time_stamp:
                unique_name = time.strftime('%Y%m%d%H%M%S')
                unique_name = get_full_name(unique_name, prefix, suffix)
            else:
                unique_name = f'{prefix}' if prefix else ''
                if suffix:
                    unique_name = f'{unique_name}_{suffix}' if unique_name else f'{suffix}'

            self.work_dir = Path(specs['root_dir']) / f'{unique_name}'
            write_yaml(self.work_dir / 'params.yaml', specs, mkdir=True)

        self.load = load
        self.seed = params.get('seed', 10)
        self.ndim = params['ndim']
        self.bsize = params['batch_size']
        self.hiddens = params['hidden_list']
        self.niter = params['niter']
        self.goal = params['goal_value']
        self.mode = params['mode']
        self.viz_rate = self.niter // 10
        self.lr = params['lr']
        self.nepochs = params['nepochs']
        self.nsamples = params['nsamples']
        self.n_init_samples = params['n_init_samples']
        self.init_nepochs = params['init_nepochs']
        self.cut_off = params['cut_off']
        self.beta = params['beta']
        self.nr_mix = params['nr_mix']
        self.base_fn = params['base_fn']
        self.only_pos = params['only_positive']
        # whether to run 1000 epochs of training for the later round of iteration
        self.full_training = params['full_training_last']
        self.input_scale = params['input_scale']
        self.fixed_sigma = params.get('fixed_sigma', None)
        self.on_policy = params.get('on_policy', False)
        self.problem_type = params.get('problem_type', 'csp')

        self.allow_repeated = params.get('allow_repeated', False)
        self.allow_repeated = self.on_policy or self.allow_repeated

        self.important_sampling = params.get('important_sampling', False)
        self.visited_dist: Optional[nn.Module] = None
        self.visited_fixed_sigma = params.get('visited_fixed_sigma', None)
        self.visited_nr_mix = params.get('visited_nr_mix', None)

        self.explore_coeff = params.get('explore_coeff', None)
        self.nepoch_visited = params.get('nepoch_visited', -1)

        self.normalize_weight = params.get('normalize_weight', True)
        self.add_ent_before_norm = params.get(
            'add_entropy_before_normalization', False)
        self.weight_type = params.get('weight_type', 'ind')

        self.model_visited = self.explore_coeff is not None or self.important_sampling

        if self.model_visited and self.nepoch_visited == -1:
            raise ValueError(
                'nepoch_visited should be specified when a model is '
                'learning visited states')

        self.init_buffer_paths = init_buffer_path

        eval_fn = params['eval_fn']
        try:
            self.fn = registered_functions[eval_fn]
        except KeyError:
            raise ValueError(f'{eval_fn} is not a valid benchmark function')

        self.device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        print(f'device: {self.device}')
        self.cpu = torch.device('cpu')
        self.model: Optional[nn.Module] = None
        self.buffer = None
        self.opt = None

        # hacky version of passing input vectors around
        self.input_vectors_norm = [
            np.linspace(start=-1.0, stop=1.0, dtype='float32', num=100)
            for _ in range(self.ndim)
        ]
        self.input_vectors = [
            self.input_scale * vec for vec in self.input_vectors_norm
        ]
        # TODO: remove this hacky way of keeping track of delta
        self.delta = self.input_vectors_norm[0][-1] - self.input_vectors_norm[
            0][-2]

        # keep track of lo and hi for indicies
        self.params_min = np.array([0] * self.ndim)
        self.params_max = np.array([len(x) - 1 for x in self.input_vectors])

        self.fvals = SortedList()