Exemple #1
0
def _get_nl_leduc_tree(env_args=None):
    if env_args is None:
        env_args = DiscretizedNLLeduc.ARGS_CLS(
            n_seats=2,
            starting_stack_sizes_list=[1000, 1000],
            bet_sizes_list_as_frac_of_pot=[1.0])

    env_bldr = HistoryEnvBuilder(env_cls=DiscretizedNLLeduc, env_args=env_args)

    _tree = PublicTree(
        env_bldr=env_bldr,
        stack_size=env_args.starting_stack_sizes_list,
        stop_at_street=None,
    )

    _tree.build_tree()

    for p in range(env_bldr.N_SEATS):
        _tree.fill_uniform_random()
    _tree.compute_ev()

    _tree.export_to_file()
    print("Tree with stack size", _tree.stack_size, "has", _tree.n_nodes,
          "nodes out of which", _tree.n_nonterm, "are non-terminal.")
    print(np.mean(_tree.root.exploitability) * env_bldr.env_cls.EV_NORMALIZER)

    return _tree
Exemple #2
0
    def _evaluate_avg_strats(self):
        expl_totals = []
        for t_idx in range(len(self._trees)):
            METRIC = self._env_bldrs[t_idx].env_cls.WIN_METRIC
            eval_tree = PublicTree(
                env_bldr=self._env_bldrs[t_idx],
                stack_size=self._env_args[t_idx].starting_stack_sizes_list,
                stop_at_street=None,
                is_debugging=False,
            )
            eval_tree.build_tree()

            def _fill(_node_eval, _node_train):
                if _node_eval.p_id_acting_next != eval_tree.CHANCE_ID and (
                        not _node_eval.is_terminal):
                    _node_eval.strategy = np.copy(
                        _node_train.data["avg_strat"])
                    assert np.allclose(np.sum(_node_eval.strategy, axis=1),
                                       1,
                                       atol=0.0001)

                for c_eval, c_train in zip(_node_eval.children,
                                           _node_train.children):
                    _fill(_node_eval=c_eval, _node_train=c_train)

            # sets up some stuff; we overwrite strategy afterwards
            eval_tree.fill_uniform_random()

            # fill with strat
            _fill(_node_eval=eval_tree.root,
                  _node_train=self._trees[t_idx].root)
            eval_tree.update_reach_probs()

            # compute EVs
            eval_tree.compute_ev()

            eval_tree.export_to_file(name=self._name + "_Avg_" +
                                     str(self._iter_counter))

            # log
            expl_p = [
                float(eval_tree.root.exploitability[p]) *
                self._env_bldrs[t_idx].env_cls.EV_NORMALIZER
                for p in range(eval_tree.n_seats)
            ]
            expl_total = sum(expl_p) / eval_tree.n_seats
            expl_totals.append(expl_total)

            self._chief_handle.add_scalar(self._exps_avg_total[t_idx],
                                          "Evaluation/" + METRIC,
                                          self._iter_counter, expl_total)

        expl_total_averaged = sum(expl_totals) / float(len(expl_totals))
        self._chief_handle.add_scalar(self._exp_all_averaged_avg_total,
                                      "Evaluation/" + METRIC,
                                      self._iter_counter, expl_total_averaged)
        return expl_total_averaged