예제 #1
0
def benchmark():
    log = Logger("data/local_analyses/benchmarks.log", "Benchmarks")
    tt = TickTock()
    cube_bench = CubeBench(log, tt)

    # Cube config variables
    cn = int(1e7)
    multi_op_size = int(1e4)  # Number of states used in multi operations

    store_repr()
    for repr_ in [True, False]:
        set_is2024(repr_)
        log.section(
            f"Benchmarking cube enviroment with {_repstr()} representation")
        tt.profile(f"Benchmarking cube environment, {_repstr()}")
        cube_bench.rotate(cn)
        cube_bench.multi_rotate(int(cn / multi_op_size), multi_op_size)
        cube_bench.onehot(cn)
        cube_bench.multi_onehot(int(cn / multi_op_size), multi_op_size)
        cube_bench.check_solution(cn)
        cube_bench.check_multi_solution(int(cn / multi_op_size), multi_op_size)
        tt.end_profile(f"Benchmarking cube environment, {_repstr()}")

    restore_repr()

    log.section("Benchmark runtime distribution")
    log(tt)
예제 #2
0
	def test_statcomp(self):
		A, B = np.random.randint(0, 100, 100), np.random.randint(5, 150, 100)
		p1, p2 = np.random.randint(100), np.random.randint(100)
		A[np.arange(100)[p1]] = -1
		B[np.arange(100)[p2]] = -1

		s = StatisticalComparison(None, Logger('local_tests/a', ''))

		_, p_exp = stats.ttest_ind(A[A!=-1], B[B!=-1], equal_var=False)
		p_gotten, _ = s.length_ttest([A,B], 0.05)
		assert np.isclose(p_exp, p_gotten)

		p_gotten, _ = s.solve_proptest([A,B], 0.05)
		assert 0 <= p_gotten <= 1
예제 #3
0
def statscompare():
    """
	Main way to run statistical comparison by running `python librubiks/analysis/statscompare.py --help`.
	Does not support config arguments.
	"""
    parser = argparse.ArgumentParser(
        description=
        'Compare two agents by doing t test of solution lengths and Xi-squared test of solve proportions'
    )
    parser.add_argument(
        '--location',
        help=
        "Folder containing evaluation results. If exactly two different agents are contained herein,"
        "these will be compared.\nOtherwise, the user will be prompted",
        type=str)
    parser.add_argument('--alpha',
                        help="Significane level used",
                        type=float,
                        default=0.01)
    parser.add_argument(
        '--compare_all',
        help=
        "If true, all comparisons in folder is run, using p value cprrection",
        type=literal_eval,
        default=True,
        choices=[True, False])

    args = parser.parse_args()

    comp = StatisticalComparison(args.location,
                                 Logger(
                                     os.path.join(args.location, "stats.log"),
                                     "Statistical comparison"),
                                 compare_all=args.compare_all)
    comp.dataload()
    comp.run_comparisons(alpha=args.alpha)
    comp.normality_plot()
예제 #4
0
        if not won: log(f"Game {i+1} was not won")
        else:
            for action_num in agent.action_queue:
                action_tup = cube.action_space[action_num]
                actions_taken.append(cube.action_names[action_tup[0]].lower(
                ) if action_tup[1] else cube.action_names[action_tup[0]])
            log(f'Actions taken: {actions_taken}')
            sequences.append(actions_taken)
    return sequences


if __name__ == "__main__":
    ### Hyper parameters ###
    net_path, use_best = '../rubiks-models/main', True
    max_time = 5
    lambda_, N = 0.16, 700

    output_path = '../rubiks-models/main/patterns.log'
    games = 1000
    support = 0.3

    ########################
    log = Logger(output_path, "Pattern mining")
    agent = AStar.from_saved(net_path, use_best, lambda_, N)
    log(f"Loaded agent {agent} with network {net_path}")

    log(f"Playing {games} games")
    actions = generate_actions(agent, games, max_time)
    log("Found patterns:")
    log(find_generalized_patterns(actions, support))
예제 #5
0
    def __init__(
            self,
            name: str,
            # Set by parser, should correspond to options in runtrain
            location: str,
            rollouts: int,
            rollout_games: int,
            rollout_depth: int,
            batch_size: int,
            alpha_update: float,
            lr: float,
            gamma: float,
            tau: float,
            update_interval: int,
            optim_fn: str,
            evaluation_interval: int,
            nn_init: str,
            is2024: bool,
            arch: str,
            analysis: bool,
            reward_method: str,

            # Currently not set by argparser/configparser
            agent=PolicySearch(net=None),
            scrambling_depths: tuple = (10, ),
            verbose: bool = True,
    ):

        self.name = name
        assert isinstance(self.name, str)

        self.rollouts = rollouts
        assert self.rollouts > 0
        self.rollout_games = rollout_games
        assert self.rollout_games > 0
        self.rollout_depth = rollout_depth
        assert rollout_depth > 0
        self.batch_size = batch_size
        assert 0 < self.batch_size <= self.rollout_games * self.rollout_depth

        self.alpha_update = alpha_update
        assert 0 <= alpha_update <= 1
        self.lr = lr
        assert float(lr) and lr <= 1
        self.gamma = gamma
        assert 0 < gamma <= 1
        self.tau = tau
        assert 0 < tau <= 1
        self.update_interval = update_interval
        assert isinstance(self.update_interval,
                          int) and 0 <= self.update_interval
        self.optim_fn = getattr(torch.optim, optim_fn)
        assert issubclass(self.optim_fn, torch.optim.Optimizer)

        self.location = location
        self.logger = Logger(
            f"{self.location}/train.log", name, verbose
        )  #Already creates logger at init to test whether path works
        self.logger.log(f"Initialized {self.name}")

        self.evaluator = Evaluator(n_games=self.eval_games,
                                   max_time=self.max_time,
                                   scrambling_depths=scrambling_depths,
                                   logger=self.logger)
        self.evaluation_interval = evaluation_interval
        assert isinstance(self.evaluation_interval,
                          int) and 0 <= self.evaluation_interval
        self.agent = agent
        assert isinstance(self.agent, DeepAgent)
        self.is2024 = is2024

        assert nn_init in ["glorot", "he"] or ( float(nn_init) or True ),\
          f"Initialization must be glorot, he or a number, but was {nn_init}"
        self.model_cfg = ModelConfig(architecture=arch,
                                     is2024=is2024,
                                     init=nn_init)

        self.analysis = analysis
        assert isinstance(self.analysis, bool)

        self.reward_method = reward_method
        assert self.reward_method in [
            "paper", "lapanfix", "schultzfix", "reward0"
        ]

        assert arch in ["fc_small", "fc_big", "res_small", "res_big", "conv"]
        if arch == "conv": assert not self.is2024
        assert isinstance(self.model_cfg, ModelConfig)
예제 #6
0
class TrainJob:
    eval_games = 200  # Not given as arguments to __init__, as they should be accessible in runtime_estim
    max_time = 0.05
    is2024: bool

    def __init__(
            self,
            name: str,
            # Set by parser, should correspond to options in runtrain
            location: str,
            rollouts: int,
            rollout_games: int,
            rollout_depth: int,
            batch_size: int,
            alpha_update: float,
            lr: float,
            gamma: float,
            tau: float,
            update_interval: int,
            optim_fn: str,
            evaluation_interval: int,
            nn_init: str,
            is2024: bool,
            arch: str,
            analysis: bool,
            reward_method: str,

            # Currently not set by argparser/configparser
            agent=PolicySearch(net=None),
            scrambling_depths: tuple = (10, ),
            verbose: bool = True,
    ):

        self.name = name
        assert isinstance(self.name, str)

        self.rollouts = rollouts
        assert self.rollouts > 0
        self.rollout_games = rollout_games
        assert self.rollout_games > 0
        self.rollout_depth = rollout_depth
        assert rollout_depth > 0
        self.batch_size = batch_size
        assert 0 < self.batch_size <= self.rollout_games * self.rollout_depth

        self.alpha_update = alpha_update
        assert 0 <= alpha_update <= 1
        self.lr = lr
        assert float(lr) and lr <= 1
        self.gamma = gamma
        assert 0 < gamma <= 1
        self.tau = tau
        assert 0 < tau <= 1
        self.update_interval = update_interval
        assert isinstance(self.update_interval,
                          int) and 0 <= self.update_interval
        self.optim_fn = getattr(torch.optim, optim_fn)
        assert issubclass(self.optim_fn, torch.optim.Optimizer)

        self.location = location
        self.logger = Logger(
            f"{self.location}/train.log", name, verbose
        )  #Already creates logger at init to test whether path works
        self.logger.log(f"Initialized {self.name}")

        self.evaluator = Evaluator(n_games=self.eval_games,
                                   max_time=self.max_time,
                                   scrambling_depths=scrambling_depths,
                                   logger=self.logger)
        self.evaluation_interval = evaluation_interval
        assert isinstance(self.evaluation_interval,
                          int) and 0 <= self.evaluation_interval
        self.agent = agent
        assert isinstance(self.agent, DeepAgent)
        self.is2024 = is2024

        assert nn_init in ["glorot", "he"] or ( float(nn_init) or True ),\
          f"Initialization must be glorot, he or a number, but was {nn_init}"
        self.model_cfg = ModelConfig(architecture=arch,
                                     is2024=is2024,
                                     init=nn_init)

        self.analysis = analysis
        assert isinstance(self.analysis, bool)

        self.reward_method = reward_method
        assert self.reward_method in [
            "paper", "lapanfix", "schultzfix", "reward0"
        ]

        assert arch in ["fc_small", "fc_big", "res_small", "res_big", "conv"]
        if arch == "conv": assert not self.is2024
        assert isinstance(self.model_cfg, ModelConfig)

    @with_used_repr
    def execute(self):

        # Sets representation
        self.logger.section(
            f"Starting job:\n{self.name} with {'20x24' if get_is2024() else '6x8x6'} representation\nLocation {self.location}\nCommit: {get_commit()}"
        )

        train = Train(
            self.rollouts,
            batch_size=self.batch_size,
            rollout_games=self.rollout_games,
            rollout_depth=self.rollout_depth,
            optim_fn=self.optim_fn,
            alpha_update=self.alpha_update,
            lr=self.lr,
            gamma=self.gamma,
            tau=self.tau,
            reward_method=self.reward_method,
            update_interval=self.update_interval,
            agent=self.agent,
            logger=self.logger,
            evaluation_interval=self.evaluation_interval,
            evaluator=self.evaluator,
            with_analysis=self.analysis,
        )
        self.logger(
            f"Rough upper bound on total evaluation time during training: {len(train.evaluation_rollouts)*self.evaluator.approximate_time()/60:.2f} min"
        )

        net = Model.create(self.model_cfg, self.logger)
        net, min_net = train.train(net)
        net.save(self.location)
        if self.evaluation_interval:
            min_net.save(self.location, True)

        train.plot_training(self.location, name=self.name)
        analysispath = os.path.join(self.location, "analysis")
        datapath = os.path.join(self.location, "train-data")
        os.mkdir(datapath)
        os.mkdir(analysispath)

        if self.analysis:
            train.analysis.plot_substate_distributions(analysispath)
            train.analysis.plot_value_targets(analysispath)
            train.analysis.plot_net_changes(analysispath)
            train.analysis.visualize_first_states(analysispath)
            np.save(f"{datapath}/avg_target_values.npy",
                    train.analysis.avg_value_targets)
            np.save(f"{datapath}/policy_entropies.npy",
                    train.analysis.policy_entropies)
            np.save(f"{datapath}/substate_val_stds.npy",
                    train.analysis.substate_val_stds)

        np.save(f"{datapath}/rollouts.npy", train.train_rollouts)
        np.save(f"{datapath}/policy_losses.npy", train.policy_losses)
        np.save(f"{datapath}/value_losses.npy", train.value_losses)
        np.save(f"{datapath}/losses.npy", train.train_losses)
        np.save(f"{datapath}/evaluation_rollouts.npy",
                train.evaluation_rollouts)
        np.save(f"{datapath}/evaluations.npy", train.sol_percents)

        return train.train_rollouts, train.train_losses

    @staticmethod
    def clean_dir(loc: str):
        """
		Cleans a training directory except for train_config.ini, the content of which is also returned
		"""
        tcpath = f"{loc}/train_config.ini"
        with open(tcpath, encoding="utf-8") as f:
            content = f.read()
        rmtree(loc)
        os.mkdir(loc)
        with open(f"{loc}/train_config.ini", "w", encoding="utf-8") as f:
            f.write(content)
        return content
예제 #7
0
    def __init__(
            self,
            name: str,
            # Set by parser, should correspond to options in runeval
            location: str,
            use_best: bool,
            agent: str,
            games: int,
            max_time: float,
            max_states: int,
            scrambling: str,
            optimized_params: bool,
            mcts_c: float,
            mcts_graph_search: bool,
            policy_sample: bool,
            astar_lambda: float,
            astar_expansions: int,
            egvm_epsilon: float,
            egvm_workers: int,
            egvm_depth: int,

            # Currently not set by parser
            verbose: bool = True,
            in_subfolder:
        bool = False,  # Should be true if there are multiple experiments
    ):

        self.name = name
        self.location = location

        assert isinstance(games, int) and games
        assert max_time >= 0
        assert max_states >= 0
        assert max_time or max_states
        scrambling = range(*scrambling)
        assert isinstance(optimized_params, bool)

        #Create evaluator
        self.logger = Logger(
            f"{self.location}/{self.name}.log", name, verbose
        )  # Already creates logger at init to test whether path works
        self.evaluator = Evaluator(n_games=games,
                                   max_time=max_time,
                                   max_states=max_states,
                                   scrambling_depths=scrambling,
                                   logger=self.logger)

        #Create agents
        agent_string = agent
        agent = getattr(agents, agent_string)
        assert issubclass(agent, agents.Agent)

        if issubclass(agent, agents.DeepAgent):
            self.agents, self.reps, agents_args = {}, {}, {}

            #DeepAgents need specific arguments
            if agent == agents.MCTS:
                assert mcts_c >= 0, f"Exploration parameter c must be 0 or larger, not {mcts_c}"
                agents_args = {'c': mcts_c, 'search_graph': mcts_graph_search}
            elif agent == agents.PolicySearch:
                assert isinstance(policy_sample, bool)
                agents_args = {'sample_policy': policy_sample}
            elif agent == agents.AStar:
                assert isinstance(
                    astar_lambda, float
                ) and 0 <= astar_lambda <= 1, "AStar lambda must be float in [0, 1]"
                assert isinstance(
                    astar_expansions, int) and astar_expansions >= 1 and (
                        not max_states or astar_expansions < max_states
                    ), "Expansions must be int < max states"
                agents_args = {
                    'lambda_': astar_lambda,
                    'expansions': astar_expansions
                }
            elif agent == agents.EGVM:
                assert isinstance(
                    egvm_epsilon, float
                ) and 0 <= egvm_epsilon <= 1, "EGVM epsilon must be float in [0, 1]"
                assert isinstance(
                    egvm_workers, int
                ) and egvm_workers >= 1, "Number of EGWM workers must a natural number"
                assert isinstance(
                    egvm_depth, int
                ) and egvm_depth >= 1, "EGWM depth must be a natural number"
                agents_args = {
                    'epsilon': egvm_epsilon,
                    'workers': egvm_workers,
                    'depth': egvm_depth
                }
            else:  # Non-parametric methods go brrrr
                agents_args = {}

            search_location = os.path.dirname(
                os.path.abspath(self.location)
            ) if in_subfolder else self.location  # Use parent folder, if parser has generated multiple folders
            # DeepAgent might have to test multiple NN's
            for folder in glob(f"{search_location}/*/") + [search_location]:
                if not os.path.isfile(os.path.join(folder, 'model.pt')):
                    continue
                store_repr()
                with open(f"{folder}/config.json") as f:
                    cfg = json.load(f)
                if optimized_params and agent in [agents.MCTS, agents.AStar]:
                    parampath = os.path.join(folder,
                                             f'{agent_string}_params.json')
                    if os.path.isfile(parampath):
                        with open(parampath, 'r') as paramfile:
                            agents_args = json.load(paramfile)
                            if agent == agents.MCTS:
                                agents_args['search_graph'] = mcts_graph_search
                    else:
                        self.logger.log(
                            f"Optimized params was set to true, but no file {parampath} was found, proceding with arguments for this {agent_string}."
                        )

                set_is2024(cfg["is2024"])
                agent = agent.from_saved(folder,
                                         use_best=use_best,
                                         **agents_args)
                key = f'{agent}{"" if folder == search_location else " " + os.path.basename(folder.rstrip(os.sep))}'

                self.reps[key] = cfg["is2024"]
                self.agents[key] = agent
                restore_repr()

            if not self.agents:
                raise FileNotFoundError(
                    f"No model.pt found in folder or subfolder of {self.location}"
                )
            self.logger.log(f"Loaded model from {search_location}")

        else:
            agent = agent()
            self.agents = {str(agent): agent}
            self.reps = {str(agent): True}

        self.agent_results = {}
        self.logger.log(
            f"Initialized {self.name} with agents {', '.join(str(s) for s in self.agents)}"
        )
        self.logger.log(
            f"TIME ESTIMATE: {len(self.agents) * self.evaluator.approximate_time() / 60:.2f} min.\t(Rough upper bound)"
        )
예제 #8
0
class EvalJob:
    is2024: bool

    def __init__(
            self,
            name: str,
            # Set by parser, should correspond to options in runeval
            location: str,
            use_best: bool,
            agent: str,
            games: int,
            max_time: float,
            max_states: int,
            scrambling: str,
            optimized_params: bool,
            mcts_c: float,
            mcts_graph_search: bool,
            policy_sample: bool,
            astar_lambda: float,
            astar_expansions: int,
            egvm_epsilon: float,
            egvm_workers: int,
            egvm_depth: int,

            # Currently not set by parser
            verbose: bool = True,
            in_subfolder:
        bool = False,  # Should be true if there are multiple experiments
    ):

        self.name = name
        self.location = location

        assert isinstance(games, int) and games
        assert max_time >= 0
        assert max_states >= 0
        assert max_time or max_states
        scrambling = range(*scrambling)
        assert isinstance(optimized_params, bool)

        #Create evaluator
        self.logger = Logger(
            f"{self.location}/{self.name}.log", name, verbose
        )  # Already creates logger at init to test whether path works
        self.evaluator = Evaluator(n_games=games,
                                   max_time=max_time,
                                   max_states=max_states,
                                   scrambling_depths=scrambling,
                                   logger=self.logger)

        #Create agents
        agent_string = agent
        agent = getattr(agents, agent_string)
        assert issubclass(agent, agents.Agent)

        if issubclass(agent, agents.DeepAgent):
            self.agents, self.reps, agents_args = {}, {}, {}

            #DeepAgents need specific arguments
            if agent == agents.MCTS:
                assert mcts_c >= 0, f"Exploration parameter c must be 0 or larger, not {mcts_c}"
                agents_args = {'c': mcts_c, 'search_graph': mcts_graph_search}
            elif agent == agents.PolicySearch:
                assert isinstance(policy_sample, bool)
                agents_args = {'sample_policy': policy_sample}
            elif agent == agents.AStar:
                assert isinstance(
                    astar_lambda, float
                ) and 0 <= astar_lambda <= 1, "AStar lambda must be float in [0, 1]"
                assert isinstance(
                    astar_expansions, int) and astar_expansions >= 1 and (
                        not max_states or astar_expansions < max_states
                    ), "Expansions must be int < max states"
                agents_args = {
                    'lambda_': astar_lambda,
                    'expansions': astar_expansions
                }
            elif agent == agents.EGVM:
                assert isinstance(
                    egvm_epsilon, float
                ) and 0 <= egvm_epsilon <= 1, "EGVM epsilon must be float in [0, 1]"
                assert isinstance(
                    egvm_workers, int
                ) and egvm_workers >= 1, "Number of EGWM workers must a natural number"
                assert isinstance(
                    egvm_depth, int
                ) and egvm_depth >= 1, "EGWM depth must be a natural number"
                agents_args = {
                    'epsilon': egvm_epsilon,
                    'workers': egvm_workers,
                    'depth': egvm_depth
                }
            else:  # Non-parametric methods go brrrr
                agents_args = {}

            search_location = os.path.dirname(
                os.path.abspath(self.location)
            ) if in_subfolder else self.location  # Use parent folder, if parser has generated multiple folders
            # DeepAgent might have to test multiple NN's
            for folder in glob(f"{search_location}/*/") + [search_location]:
                if not os.path.isfile(os.path.join(folder, 'model.pt')):
                    continue
                store_repr()
                with open(f"{folder}/config.json") as f:
                    cfg = json.load(f)
                if optimized_params and agent in [agents.MCTS, agents.AStar]:
                    parampath = os.path.join(folder,
                                             f'{agent_string}_params.json')
                    if os.path.isfile(parampath):
                        with open(parampath, 'r') as paramfile:
                            agents_args = json.load(paramfile)
                            if agent == agents.MCTS:
                                agents_args['search_graph'] = mcts_graph_search
                    else:
                        self.logger.log(
                            f"Optimized params was set to true, but no file {parampath} was found, proceding with arguments for this {agent_string}."
                        )

                set_is2024(cfg["is2024"])
                agent = agent.from_saved(folder,
                                         use_best=use_best,
                                         **agents_args)
                key = f'{agent}{"" if folder == search_location else " " + os.path.basename(folder.rstrip(os.sep))}'

                self.reps[key] = cfg["is2024"]
                self.agents[key] = agent
                restore_repr()

            if not self.agents:
                raise FileNotFoundError(
                    f"No model.pt found in folder or subfolder of {self.location}"
                )
            self.logger.log(f"Loaded model from {search_location}")

        else:
            agent = agent()
            self.agents = {str(agent): agent}
            self.reps = {str(agent): True}

        self.agent_results = {}
        self.logger.log(
            f"Initialized {self.name} with agents {', '.join(str(s) for s in self.agents)}"
        )
        self.logger.log(
            f"TIME ESTIMATE: {len(self.agents) * self.evaluator.approximate_time() / 60:.2f} min.\t(Rough upper bound)"
        )

    def execute(self):
        self.logger.log(
            f"Beginning evaluator {self.name}\nLocation {self.location}\nCommit: {get_commit()}"
        )
        for (name, agent), representation in zip(self.agents.items(),
                                                 self.reps.values()):
            self.is2024 = representation
            self.agent_results[name] = self._single_exec(name, agent)

    @with_used_repr
    def _single_exec(self, name: str, agent: Agent):
        self.logger.section(f'Evaluationg agent {name}')
        res, states, times = self.evaluator.eval(agent)
        subfolder = os.path.join(self.location, "evaluation_results")
        os.makedirs(subfolder, exist_ok=True)
        paths = [
            os.path.join(subfolder, f"{name}_results.npy"),
            os.path.join(subfolder, f"{name}_states_seen.npy"),
            os.path.join(subfolder, f"{name}_playtimes.npy")
        ]
        np.save(paths[0], res)
        np.save(paths[1], states)
        np.save(paths[2], times)
        self.logger.log("Saved evaluation results to\n" + "\n".join(paths))
        return res, states, times

    @staticmethod
    def plot_all_jobs(jobs: list, save_location: str):
        results, states, times, settings = dict(), dict(), dict(), dict()
        export_settings = dict()
        for job in jobs:
            for agent, (result, states_, times_) in job.agent_results.items():
                key = agent if len(jobs) == 1 else f"{job.name} - {agent}"
                results[key] = result
                states[key] = states_
                times[key] = times_
                settings[key] = {
                    "n_games": job.evaluator.n_games,
                    "max_time": job.evaluator.max_time,
                    "max_states": job.evaluator.max_states,
                    "scrambling_depths": job.evaluator.scrambling_depths,
                }
                export_settings[key] = {
                    **settings[key], "scrambling_depths":
                    job.evaluator.scrambling_depths.tolist()
                }
        eval_settings_path = os.path.join(save_location, "eval_settings.json")
        with open(eval_settings_path, "w", encoding="utf-8") as f:
            json.dump(export_settings, f, indent=4)
        savepaths = Evaluator.plot_evaluators(results, states, times, settings,
                                              save_location)
        joinedpaths = "\n".join(savepaths)
        job.logger(
            f"Saved settings to {eval_settings_path} and plots to\n{joinedpaths}"
        )
예제 #9
0
    tt = TickTock()
    job_settings = parser.parse(False)
    for settings in job_settings:
        job_rollouts = settings["rollouts"]
        job_evaluation_interval = settings["evaluation_interval"]
        settings[
            "rollouts"] = 5  # Five rollouts should be good enough to give a decent estimate
        settings["evaluation_interval"] = 0
        # Estimates training time
        tt.tick()
        train = TrainJob(**settings)
        train.execute()
        estimated_runtime += tt.tock() * job_rollouts / settings["rollouts"]
        # Estimates evaluation time
        evaluations = job_rollouts / job_evaluation_interval if job_evaluation_interval else 0
        estimated_runtime += np.ceil(
            evaluations) * TrainJob.eval_games * TrainJob.max_time

        # Cleans up
        shutil.rmtree(settings["location"])

    log_loc = job_settings[0]["location"]\
     if len(job_settings) == 1\
     else os.path.abspath(os.path.join(job_settings[0]["location"], ".."))
    log_loc += "/runtime_estimation.txt"
    log = Logger(log_loc, "Training time estimation")
    log("\n".join([
        f"Expected training time for the {len(job_settings)} given jobs: {timedelta(seconds=int(estimated_runtime))}",
        f"With 20 % buffer: {timedelta(seconds=int(estimated_runtime*1.2))}"
    ]))
예제 #10
0
def agent_optimize():
	"""
	Main way to run optimization. Hard coded to run optimization at 1 sec per game, but other behaviour can be set with CLI arguments seen by
	running `python librubiks/solving/hyper_optim.py --help`.
	Does not support config arguments.
	NB: The path here is different to the one in runeval and runtrain:
	It needs to be to folder containing model.pt! It doesen't work with parent folder.

	Can work with runeval through
	```
	python librubiks/solving/hyper_optim.py --location example/net1/
	python runeval.py --location example/ --optimized_params True
	```
	"""
	set_seeds()

	#Lot of overhead just for default argument niceness: latest model is latest
	from runeval import train_folders

	model_path = ''
	if train_folders:
		for folder in [train_folders[-1]] + glob(f"{train_folders[-1]}/*/"):
			if os.path.isfile(os.path.join(folder, 'model.pt')):
				model_path = os.path.join(folder)
				break

	parser = argparse.ArgumentParser(description='Optimize Monte Carlo Tree Search for one model')
	parser.add_argument('--location', help='Folder which includes  model.pt. Results will also be saved here',
		type=str, default=model_path)
	parser.add_argument('--iterations', help='Number of iterations of Bayesian Optimization',
		type=int, default=125)
	parser.add_argument('--agent', help='Name of agent corresponding to agent class in librubiks.solving.agents',
		type=str, default='AStar', choices = ['AStar', 'MCTS', 'EGVM'])
	parser.add_argument('--depth', help='Single number corresponding to the depth at which to test. If 0: run this at deep',
		type=int, default=0)
	parser.add_argument('--eval_games', help='Number of games to evaluate at depth',
			type = int, default='100')
	parser.add_argument('--save_optimal', help='If Tue, saves a JSON of optimal hyperparameters usable for runeval',
			type=literal_eval, default=True, choices = [True, False])
	parser.add_argument('--use_best', help="Set to True to use model-best.pt instead of model.pt.", type=literal_eval, default=True,
			choices = [True, False])
	parser.add_argument('--optim_lengths', help="Set to true to optimize against sol percentage / solution length. Else, simply use sol %", type=literal_eval,
			default=True, choices = [True, False])
	parser.add_argument('--optimizer', help="Either BO or grid", type=str, default="grid", choices = ("grid", "BO"))

	args = parser.parse_args()

	agent_name = args.agent
	if agent_name == 'MCTS':
		params = {
			'c': (0.1, 100),
		}
		def prepper(params): return params

		persistent_params = {
			'net': Model.load(args.location, load_best=args.use_best),
			'search_graph': True,
		}
	elif agent_name == 'AStar':
		params = {
			'lambda_':    (0, 0.4),
			'expansions': (1, 1000),
		}
		def prepper(params):
			params['expansions'] = int(params['expansions'])
			return params

		persistent_params = {
			'net': Model.load(args.location, load_best=args.use_best),
		}
	elif agent_name == 'EGVM':
		params = {
				'epsilon': (0, 0.5),
				'workers': (1, 500),
				'depth':   (1, 250),
			}

		def prepper(params):
			params['workers'] = int(params['workers'])
			params['depth'] = int(params['depth'])
			return params

		persistent_params = {
			'net': Model.load(args.location, load_best=args.use_best),
		}
	else:
		raise NameError(f"{agent_name} does not correspond to a known agent, please pick either AStar, MCTS or EGVM")

	logger = Logger(os.path.join(args.location, f'{agent_name}_optimization.log'), 'Optimization')

	logger.log(f"{agent_name} optimization. Using network from {model_path}.")
	logger.log(f"Received arguments: {vars(args)}")

	agent = getattr(agents, agent_name)

	evaluator = Evaluator(n_games=args.eval_games, max_time=5, scrambling_depths=range(0) if args.depth == 0 else [args.depth])
	assert args.optimizer in ["BO", "grid"], f"Optimizer should be 'BO' or 'grid', not '{args.optimizer}'"
	if args.optimizer == "BO":
		optimizer = BayesianOptimizer(target_function=None, parameters=params, logger=logger)
	else:
		optimizer = GridSearch(target_function=None, parameters=params, logger=logger)
	optimizer.objective_from_evaluator(evaluator, agent, persistent_params, param_prepper=prepper, optim_lengths=args.optim_lengths)
	optimizer.optimize(args.iterations)

	if args.save_optimal:
		with open(os.path.join(args.location, f'{agent_name}_params.json'), 'w') as outfile:
			json.dump(prepper(copy(optimizer.optimal)), outfile)
예제 #11
0
import matplotlib.pyplot as plt
import numpy as np
import torch

from librubiks import gpu, no_grad
from librubiks import cube
from librubiks.model import Model
from librubiks.utils import TickTock, Logger

tt = TickTock()
log = Logger("data/local_analyses/net.log", "Analyzing MCTS")
net = Model.load("data/local_method_comparison/asgerfix").eval().to(gpu)


def _get_adi_ff_slices(b, n):
	slice_size = n // b + 1
	# Final slice may have overflow, however this is simply ignored when indexing
	slices = [slice(i * slice_size, (i + 1) * slice_size) for i in range(b)]
	return slices

def _ff(oh_states, value=True, policy=True):
	batches = 1
	while True:
		try:
			value_parts = [net(oh_states[slice_], policy=policy, value=value).squeeze() for slice_ in
						   _get_adi_ff_slices(batches, len(oh_states))]
			values = torch.cat(value_parts).cpu()
			break
		except RuntimeError as e:  # Usually caused by running out of vram. If not, the error is still raised, else batch size is reduced
			if "alloc" not in str(e):
				raise e