def benchmark(): log = Logger("data/local_analyses/benchmarks.log", "Benchmarks") tt = TickTock() cube_bench = CubeBench(log, tt) # Cube config variables cn = int(1e7) multi_op_size = int(1e4) # Number of states used in multi operations store_repr() for repr_ in [True, False]: set_is2024(repr_) log.section( f"Benchmarking cube enviroment with {_repstr()} representation") tt.profile(f"Benchmarking cube environment, {_repstr()}") cube_bench.rotate(cn) cube_bench.multi_rotate(int(cn / multi_op_size), multi_op_size) cube_bench.onehot(cn) cube_bench.multi_onehot(int(cn / multi_op_size), multi_op_size) cube_bench.check_solution(cn) cube_bench.check_multi_solution(int(cn / multi_op_size), multi_op_size) tt.end_profile(f"Benchmarking cube environment, {_repstr()}") restore_repr() log.section("Benchmark runtime distribution") log(tt)
def test_statcomp(self): A, B = np.random.randint(0, 100, 100), np.random.randint(5, 150, 100) p1, p2 = np.random.randint(100), np.random.randint(100) A[np.arange(100)[p1]] = -1 B[np.arange(100)[p2]] = -1 s = StatisticalComparison(None, Logger('local_tests/a', '')) _, p_exp = stats.ttest_ind(A[A!=-1], B[B!=-1], equal_var=False) p_gotten, _ = s.length_ttest([A,B], 0.05) assert np.isclose(p_exp, p_gotten) p_gotten, _ = s.solve_proptest([A,B], 0.05) assert 0 <= p_gotten <= 1
def statscompare(): """ Main way to run statistical comparison by running `python librubiks/analysis/statscompare.py --help`. Does not support config arguments. """ parser = argparse.ArgumentParser( description= 'Compare two agents by doing t test of solution lengths and Xi-squared test of solve proportions' ) parser.add_argument( '--location', help= "Folder containing evaluation results. If exactly two different agents are contained herein," "these will be compared.\nOtherwise, the user will be prompted", type=str) parser.add_argument('--alpha', help="Significane level used", type=float, default=0.01) parser.add_argument( '--compare_all', help= "If true, all comparisons in folder is run, using p value cprrection", type=literal_eval, default=True, choices=[True, False]) args = parser.parse_args() comp = StatisticalComparison(args.location, Logger( os.path.join(args.location, "stats.log"), "Statistical comparison"), compare_all=args.compare_all) comp.dataload() comp.run_comparisons(alpha=args.alpha) comp.normality_plot()
if not won: log(f"Game {i+1} was not won") else: for action_num in agent.action_queue: action_tup = cube.action_space[action_num] actions_taken.append(cube.action_names[action_tup[0]].lower( ) if action_tup[1] else cube.action_names[action_tup[0]]) log(f'Actions taken: {actions_taken}') sequences.append(actions_taken) return sequences if __name__ == "__main__": ### Hyper parameters ### net_path, use_best = '../rubiks-models/main', True max_time = 5 lambda_, N = 0.16, 700 output_path = '../rubiks-models/main/patterns.log' games = 1000 support = 0.3 ######################## log = Logger(output_path, "Pattern mining") agent = AStar.from_saved(net_path, use_best, lambda_, N) log(f"Loaded agent {agent} with network {net_path}") log(f"Playing {games} games") actions = generate_actions(agent, games, max_time) log("Found patterns:") log(find_generalized_patterns(actions, support))
def __init__( self, name: str, # Set by parser, should correspond to options in runtrain location: str, rollouts: int, rollout_games: int, rollout_depth: int, batch_size: int, alpha_update: float, lr: float, gamma: float, tau: float, update_interval: int, optim_fn: str, evaluation_interval: int, nn_init: str, is2024: bool, arch: str, analysis: bool, reward_method: str, # Currently not set by argparser/configparser agent=PolicySearch(net=None), scrambling_depths: tuple = (10, ), verbose: bool = True, ): self.name = name assert isinstance(self.name, str) self.rollouts = rollouts assert self.rollouts > 0 self.rollout_games = rollout_games assert self.rollout_games > 0 self.rollout_depth = rollout_depth assert rollout_depth > 0 self.batch_size = batch_size assert 0 < self.batch_size <= self.rollout_games * self.rollout_depth self.alpha_update = alpha_update assert 0 <= alpha_update <= 1 self.lr = lr assert float(lr) and lr <= 1 self.gamma = gamma assert 0 < gamma <= 1 self.tau = tau assert 0 < tau <= 1 self.update_interval = update_interval assert isinstance(self.update_interval, int) and 0 <= self.update_interval self.optim_fn = getattr(torch.optim, optim_fn) assert issubclass(self.optim_fn, torch.optim.Optimizer) self.location = location self.logger = Logger( f"{self.location}/train.log", name, verbose ) #Already creates logger at init to test whether path works self.logger.log(f"Initialized {self.name}") self.evaluator = Evaluator(n_games=self.eval_games, max_time=self.max_time, scrambling_depths=scrambling_depths, logger=self.logger) self.evaluation_interval = evaluation_interval assert isinstance(self.evaluation_interval, int) and 0 <= self.evaluation_interval self.agent = agent assert isinstance(self.agent, DeepAgent) self.is2024 = is2024 assert nn_init in ["glorot", "he"] or ( float(nn_init) or True ),\ f"Initialization must be glorot, he or a number, but was {nn_init}" self.model_cfg = ModelConfig(architecture=arch, is2024=is2024, init=nn_init) self.analysis = analysis assert isinstance(self.analysis, bool) self.reward_method = reward_method assert self.reward_method in [ "paper", "lapanfix", "schultzfix", "reward0" ] assert arch in ["fc_small", "fc_big", "res_small", "res_big", "conv"] if arch == "conv": assert not self.is2024 assert isinstance(self.model_cfg, ModelConfig)
class TrainJob: eval_games = 200 # Not given as arguments to __init__, as they should be accessible in runtime_estim max_time = 0.05 is2024: bool def __init__( self, name: str, # Set by parser, should correspond to options in runtrain location: str, rollouts: int, rollout_games: int, rollout_depth: int, batch_size: int, alpha_update: float, lr: float, gamma: float, tau: float, update_interval: int, optim_fn: str, evaluation_interval: int, nn_init: str, is2024: bool, arch: str, analysis: bool, reward_method: str, # Currently not set by argparser/configparser agent=PolicySearch(net=None), scrambling_depths: tuple = (10, ), verbose: bool = True, ): self.name = name assert isinstance(self.name, str) self.rollouts = rollouts assert self.rollouts > 0 self.rollout_games = rollout_games assert self.rollout_games > 0 self.rollout_depth = rollout_depth assert rollout_depth > 0 self.batch_size = batch_size assert 0 < self.batch_size <= self.rollout_games * self.rollout_depth self.alpha_update = alpha_update assert 0 <= alpha_update <= 1 self.lr = lr assert float(lr) and lr <= 1 self.gamma = gamma assert 0 < gamma <= 1 self.tau = tau assert 0 < tau <= 1 self.update_interval = update_interval assert isinstance(self.update_interval, int) and 0 <= self.update_interval self.optim_fn = getattr(torch.optim, optim_fn) assert issubclass(self.optim_fn, torch.optim.Optimizer) self.location = location self.logger = Logger( f"{self.location}/train.log", name, verbose ) #Already creates logger at init to test whether path works self.logger.log(f"Initialized {self.name}") self.evaluator = Evaluator(n_games=self.eval_games, max_time=self.max_time, scrambling_depths=scrambling_depths, logger=self.logger) self.evaluation_interval = evaluation_interval assert isinstance(self.evaluation_interval, int) and 0 <= self.evaluation_interval self.agent = agent assert isinstance(self.agent, DeepAgent) self.is2024 = is2024 assert nn_init in ["glorot", "he"] or ( float(nn_init) or True ),\ f"Initialization must be glorot, he or a number, but was {nn_init}" self.model_cfg = ModelConfig(architecture=arch, is2024=is2024, init=nn_init) self.analysis = analysis assert isinstance(self.analysis, bool) self.reward_method = reward_method assert self.reward_method in [ "paper", "lapanfix", "schultzfix", "reward0" ] assert arch in ["fc_small", "fc_big", "res_small", "res_big", "conv"] if arch == "conv": assert not self.is2024 assert isinstance(self.model_cfg, ModelConfig) @with_used_repr def execute(self): # Sets representation self.logger.section( f"Starting job:\n{self.name} with {'20x24' if get_is2024() else '6x8x6'} representation\nLocation {self.location}\nCommit: {get_commit()}" ) train = Train( self.rollouts, batch_size=self.batch_size, rollout_games=self.rollout_games, rollout_depth=self.rollout_depth, optim_fn=self.optim_fn, alpha_update=self.alpha_update, lr=self.lr, gamma=self.gamma, tau=self.tau, reward_method=self.reward_method, update_interval=self.update_interval, agent=self.agent, logger=self.logger, evaluation_interval=self.evaluation_interval, evaluator=self.evaluator, with_analysis=self.analysis, ) self.logger( f"Rough upper bound on total evaluation time during training: {len(train.evaluation_rollouts)*self.evaluator.approximate_time()/60:.2f} min" ) net = Model.create(self.model_cfg, self.logger) net, min_net = train.train(net) net.save(self.location) if self.evaluation_interval: min_net.save(self.location, True) train.plot_training(self.location, name=self.name) analysispath = os.path.join(self.location, "analysis") datapath = os.path.join(self.location, "train-data") os.mkdir(datapath) os.mkdir(analysispath) if self.analysis: train.analysis.plot_substate_distributions(analysispath) train.analysis.plot_value_targets(analysispath) train.analysis.plot_net_changes(analysispath) train.analysis.visualize_first_states(analysispath) np.save(f"{datapath}/avg_target_values.npy", train.analysis.avg_value_targets) np.save(f"{datapath}/policy_entropies.npy", train.analysis.policy_entropies) np.save(f"{datapath}/substate_val_stds.npy", train.analysis.substate_val_stds) np.save(f"{datapath}/rollouts.npy", train.train_rollouts) np.save(f"{datapath}/policy_losses.npy", train.policy_losses) np.save(f"{datapath}/value_losses.npy", train.value_losses) np.save(f"{datapath}/losses.npy", train.train_losses) np.save(f"{datapath}/evaluation_rollouts.npy", train.evaluation_rollouts) np.save(f"{datapath}/evaluations.npy", train.sol_percents) return train.train_rollouts, train.train_losses @staticmethod def clean_dir(loc: str): """ Cleans a training directory except for train_config.ini, the content of which is also returned """ tcpath = f"{loc}/train_config.ini" with open(tcpath, encoding="utf-8") as f: content = f.read() rmtree(loc) os.mkdir(loc) with open(f"{loc}/train_config.ini", "w", encoding="utf-8") as f: f.write(content) return content
def __init__( self, name: str, # Set by parser, should correspond to options in runeval location: str, use_best: bool, agent: str, games: int, max_time: float, max_states: int, scrambling: str, optimized_params: bool, mcts_c: float, mcts_graph_search: bool, policy_sample: bool, astar_lambda: float, astar_expansions: int, egvm_epsilon: float, egvm_workers: int, egvm_depth: int, # Currently not set by parser verbose: bool = True, in_subfolder: bool = False, # Should be true if there are multiple experiments ): self.name = name self.location = location assert isinstance(games, int) and games assert max_time >= 0 assert max_states >= 0 assert max_time or max_states scrambling = range(*scrambling) assert isinstance(optimized_params, bool) #Create evaluator self.logger = Logger( f"{self.location}/{self.name}.log", name, verbose ) # Already creates logger at init to test whether path works self.evaluator = Evaluator(n_games=games, max_time=max_time, max_states=max_states, scrambling_depths=scrambling, logger=self.logger) #Create agents agent_string = agent agent = getattr(agents, agent_string) assert issubclass(agent, agents.Agent) if issubclass(agent, agents.DeepAgent): self.agents, self.reps, agents_args = {}, {}, {} #DeepAgents need specific arguments if agent == agents.MCTS: assert mcts_c >= 0, f"Exploration parameter c must be 0 or larger, not {mcts_c}" agents_args = {'c': mcts_c, 'search_graph': mcts_graph_search} elif agent == agents.PolicySearch: assert isinstance(policy_sample, bool) agents_args = {'sample_policy': policy_sample} elif agent == agents.AStar: assert isinstance( astar_lambda, float ) and 0 <= astar_lambda <= 1, "AStar lambda must be float in [0, 1]" assert isinstance( astar_expansions, int) and astar_expansions >= 1 and ( not max_states or astar_expansions < max_states ), "Expansions must be int < max states" agents_args = { 'lambda_': astar_lambda, 'expansions': astar_expansions } elif agent == agents.EGVM: assert isinstance( egvm_epsilon, float ) and 0 <= egvm_epsilon <= 1, "EGVM epsilon must be float in [0, 1]" assert isinstance( egvm_workers, int ) and egvm_workers >= 1, "Number of EGWM workers must a natural number" assert isinstance( egvm_depth, int ) and egvm_depth >= 1, "EGWM depth must be a natural number" agents_args = { 'epsilon': egvm_epsilon, 'workers': egvm_workers, 'depth': egvm_depth } else: # Non-parametric methods go brrrr agents_args = {} search_location = os.path.dirname( os.path.abspath(self.location) ) if in_subfolder else self.location # Use parent folder, if parser has generated multiple folders # DeepAgent might have to test multiple NN's for folder in glob(f"{search_location}/*/") + [search_location]: if not os.path.isfile(os.path.join(folder, 'model.pt')): continue store_repr() with open(f"{folder}/config.json") as f: cfg = json.load(f) if optimized_params and agent in [agents.MCTS, agents.AStar]: parampath = os.path.join(folder, f'{agent_string}_params.json') if os.path.isfile(parampath): with open(parampath, 'r') as paramfile: agents_args = json.load(paramfile) if agent == agents.MCTS: agents_args['search_graph'] = mcts_graph_search else: self.logger.log( f"Optimized params was set to true, but no file {parampath} was found, proceding with arguments for this {agent_string}." ) set_is2024(cfg["is2024"]) agent = agent.from_saved(folder, use_best=use_best, **agents_args) key = f'{agent}{"" if folder == search_location else " " + os.path.basename(folder.rstrip(os.sep))}' self.reps[key] = cfg["is2024"] self.agents[key] = agent restore_repr() if not self.agents: raise FileNotFoundError( f"No model.pt found in folder or subfolder of {self.location}" ) self.logger.log(f"Loaded model from {search_location}") else: agent = agent() self.agents = {str(agent): agent} self.reps = {str(agent): True} self.agent_results = {} self.logger.log( f"Initialized {self.name} with agents {', '.join(str(s) for s in self.agents)}" ) self.logger.log( f"TIME ESTIMATE: {len(self.agents) * self.evaluator.approximate_time() / 60:.2f} min.\t(Rough upper bound)" )
class EvalJob: is2024: bool def __init__( self, name: str, # Set by parser, should correspond to options in runeval location: str, use_best: bool, agent: str, games: int, max_time: float, max_states: int, scrambling: str, optimized_params: bool, mcts_c: float, mcts_graph_search: bool, policy_sample: bool, astar_lambda: float, astar_expansions: int, egvm_epsilon: float, egvm_workers: int, egvm_depth: int, # Currently not set by parser verbose: bool = True, in_subfolder: bool = False, # Should be true if there are multiple experiments ): self.name = name self.location = location assert isinstance(games, int) and games assert max_time >= 0 assert max_states >= 0 assert max_time or max_states scrambling = range(*scrambling) assert isinstance(optimized_params, bool) #Create evaluator self.logger = Logger( f"{self.location}/{self.name}.log", name, verbose ) # Already creates logger at init to test whether path works self.evaluator = Evaluator(n_games=games, max_time=max_time, max_states=max_states, scrambling_depths=scrambling, logger=self.logger) #Create agents agent_string = agent agent = getattr(agents, agent_string) assert issubclass(agent, agents.Agent) if issubclass(agent, agents.DeepAgent): self.agents, self.reps, agents_args = {}, {}, {} #DeepAgents need specific arguments if agent == agents.MCTS: assert mcts_c >= 0, f"Exploration parameter c must be 0 or larger, not {mcts_c}" agents_args = {'c': mcts_c, 'search_graph': mcts_graph_search} elif agent == agents.PolicySearch: assert isinstance(policy_sample, bool) agents_args = {'sample_policy': policy_sample} elif agent == agents.AStar: assert isinstance( astar_lambda, float ) and 0 <= astar_lambda <= 1, "AStar lambda must be float in [0, 1]" assert isinstance( astar_expansions, int) and astar_expansions >= 1 and ( not max_states or astar_expansions < max_states ), "Expansions must be int < max states" agents_args = { 'lambda_': astar_lambda, 'expansions': astar_expansions } elif agent == agents.EGVM: assert isinstance( egvm_epsilon, float ) and 0 <= egvm_epsilon <= 1, "EGVM epsilon must be float in [0, 1]" assert isinstance( egvm_workers, int ) and egvm_workers >= 1, "Number of EGWM workers must a natural number" assert isinstance( egvm_depth, int ) and egvm_depth >= 1, "EGWM depth must be a natural number" agents_args = { 'epsilon': egvm_epsilon, 'workers': egvm_workers, 'depth': egvm_depth } else: # Non-parametric methods go brrrr agents_args = {} search_location = os.path.dirname( os.path.abspath(self.location) ) if in_subfolder else self.location # Use parent folder, if parser has generated multiple folders # DeepAgent might have to test multiple NN's for folder in glob(f"{search_location}/*/") + [search_location]: if not os.path.isfile(os.path.join(folder, 'model.pt')): continue store_repr() with open(f"{folder}/config.json") as f: cfg = json.load(f) if optimized_params and agent in [agents.MCTS, agents.AStar]: parampath = os.path.join(folder, f'{agent_string}_params.json') if os.path.isfile(parampath): with open(parampath, 'r') as paramfile: agents_args = json.load(paramfile) if agent == agents.MCTS: agents_args['search_graph'] = mcts_graph_search else: self.logger.log( f"Optimized params was set to true, but no file {parampath} was found, proceding with arguments for this {agent_string}." ) set_is2024(cfg["is2024"]) agent = agent.from_saved(folder, use_best=use_best, **agents_args) key = f'{agent}{"" if folder == search_location else " " + os.path.basename(folder.rstrip(os.sep))}' self.reps[key] = cfg["is2024"] self.agents[key] = agent restore_repr() if not self.agents: raise FileNotFoundError( f"No model.pt found in folder or subfolder of {self.location}" ) self.logger.log(f"Loaded model from {search_location}") else: agent = agent() self.agents = {str(agent): agent} self.reps = {str(agent): True} self.agent_results = {} self.logger.log( f"Initialized {self.name} with agents {', '.join(str(s) for s in self.agents)}" ) self.logger.log( f"TIME ESTIMATE: {len(self.agents) * self.evaluator.approximate_time() / 60:.2f} min.\t(Rough upper bound)" ) def execute(self): self.logger.log( f"Beginning evaluator {self.name}\nLocation {self.location}\nCommit: {get_commit()}" ) for (name, agent), representation in zip(self.agents.items(), self.reps.values()): self.is2024 = representation self.agent_results[name] = self._single_exec(name, agent) @with_used_repr def _single_exec(self, name: str, agent: Agent): self.logger.section(f'Evaluationg agent {name}') res, states, times = self.evaluator.eval(agent) subfolder = os.path.join(self.location, "evaluation_results") os.makedirs(subfolder, exist_ok=True) paths = [ os.path.join(subfolder, f"{name}_results.npy"), os.path.join(subfolder, f"{name}_states_seen.npy"), os.path.join(subfolder, f"{name}_playtimes.npy") ] np.save(paths[0], res) np.save(paths[1], states) np.save(paths[2], times) self.logger.log("Saved evaluation results to\n" + "\n".join(paths)) return res, states, times @staticmethod def plot_all_jobs(jobs: list, save_location: str): results, states, times, settings = dict(), dict(), dict(), dict() export_settings = dict() for job in jobs: for agent, (result, states_, times_) in job.agent_results.items(): key = agent if len(jobs) == 1 else f"{job.name} - {agent}" results[key] = result states[key] = states_ times[key] = times_ settings[key] = { "n_games": job.evaluator.n_games, "max_time": job.evaluator.max_time, "max_states": job.evaluator.max_states, "scrambling_depths": job.evaluator.scrambling_depths, } export_settings[key] = { **settings[key], "scrambling_depths": job.evaluator.scrambling_depths.tolist() } eval_settings_path = os.path.join(save_location, "eval_settings.json") with open(eval_settings_path, "w", encoding="utf-8") as f: json.dump(export_settings, f, indent=4) savepaths = Evaluator.plot_evaluators(results, states, times, settings, save_location) joinedpaths = "\n".join(savepaths) job.logger( f"Saved settings to {eval_settings_path} and plots to\n{joinedpaths}" )
tt = TickTock() job_settings = parser.parse(False) for settings in job_settings: job_rollouts = settings["rollouts"] job_evaluation_interval = settings["evaluation_interval"] settings[ "rollouts"] = 5 # Five rollouts should be good enough to give a decent estimate settings["evaluation_interval"] = 0 # Estimates training time tt.tick() train = TrainJob(**settings) train.execute() estimated_runtime += tt.tock() * job_rollouts / settings["rollouts"] # Estimates evaluation time evaluations = job_rollouts / job_evaluation_interval if job_evaluation_interval else 0 estimated_runtime += np.ceil( evaluations) * TrainJob.eval_games * TrainJob.max_time # Cleans up shutil.rmtree(settings["location"]) log_loc = job_settings[0]["location"]\ if len(job_settings) == 1\ else os.path.abspath(os.path.join(job_settings[0]["location"], "..")) log_loc += "/runtime_estimation.txt" log = Logger(log_loc, "Training time estimation") log("\n".join([ f"Expected training time for the {len(job_settings)} given jobs: {timedelta(seconds=int(estimated_runtime))}", f"With 20 % buffer: {timedelta(seconds=int(estimated_runtime*1.2))}" ]))
def agent_optimize(): """ Main way to run optimization. Hard coded to run optimization at 1 sec per game, but other behaviour can be set with CLI arguments seen by running `python librubiks/solving/hyper_optim.py --help`. Does not support config arguments. NB: The path here is different to the one in runeval and runtrain: It needs to be to folder containing model.pt! It doesen't work with parent folder. Can work with runeval through ``` python librubiks/solving/hyper_optim.py --location example/net1/ python runeval.py --location example/ --optimized_params True ``` """ set_seeds() #Lot of overhead just for default argument niceness: latest model is latest from runeval import train_folders model_path = '' if train_folders: for folder in [train_folders[-1]] + glob(f"{train_folders[-1]}/*/"): if os.path.isfile(os.path.join(folder, 'model.pt')): model_path = os.path.join(folder) break parser = argparse.ArgumentParser(description='Optimize Monte Carlo Tree Search for one model') parser.add_argument('--location', help='Folder which includes model.pt. Results will also be saved here', type=str, default=model_path) parser.add_argument('--iterations', help='Number of iterations of Bayesian Optimization', type=int, default=125) parser.add_argument('--agent', help='Name of agent corresponding to agent class in librubiks.solving.agents', type=str, default='AStar', choices = ['AStar', 'MCTS', 'EGVM']) parser.add_argument('--depth', help='Single number corresponding to the depth at which to test. If 0: run this at deep', type=int, default=0) parser.add_argument('--eval_games', help='Number of games to evaluate at depth', type = int, default='100') parser.add_argument('--save_optimal', help='If Tue, saves a JSON of optimal hyperparameters usable for runeval', type=literal_eval, default=True, choices = [True, False]) parser.add_argument('--use_best', help="Set to True to use model-best.pt instead of model.pt.", type=literal_eval, default=True, choices = [True, False]) parser.add_argument('--optim_lengths', help="Set to true to optimize against sol percentage / solution length. Else, simply use sol %", type=literal_eval, default=True, choices = [True, False]) parser.add_argument('--optimizer', help="Either BO or grid", type=str, default="grid", choices = ("grid", "BO")) args = parser.parse_args() agent_name = args.agent if agent_name == 'MCTS': params = { 'c': (0.1, 100), } def prepper(params): return params persistent_params = { 'net': Model.load(args.location, load_best=args.use_best), 'search_graph': True, } elif agent_name == 'AStar': params = { 'lambda_': (0, 0.4), 'expansions': (1, 1000), } def prepper(params): params['expansions'] = int(params['expansions']) return params persistent_params = { 'net': Model.load(args.location, load_best=args.use_best), } elif agent_name == 'EGVM': params = { 'epsilon': (0, 0.5), 'workers': (1, 500), 'depth': (1, 250), } def prepper(params): params['workers'] = int(params['workers']) params['depth'] = int(params['depth']) return params persistent_params = { 'net': Model.load(args.location, load_best=args.use_best), } else: raise NameError(f"{agent_name} does not correspond to a known agent, please pick either AStar, MCTS or EGVM") logger = Logger(os.path.join(args.location, f'{agent_name}_optimization.log'), 'Optimization') logger.log(f"{agent_name} optimization. Using network from {model_path}.") logger.log(f"Received arguments: {vars(args)}") agent = getattr(agents, agent_name) evaluator = Evaluator(n_games=args.eval_games, max_time=5, scrambling_depths=range(0) if args.depth == 0 else [args.depth]) assert args.optimizer in ["BO", "grid"], f"Optimizer should be 'BO' or 'grid', not '{args.optimizer}'" if args.optimizer == "BO": optimizer = BayesianOptimizer(target_function=None, parameters=params, logger=logger) else: optimizer = GridSearch(target_function=None, parameters=params, logger=logger) optimizer.objective_from_evaluator(evaluator, agent, persistent_params, param_prepper=prepper, optim_lengths=args.optim_lengths) optimizer.optimize(args.iterations) if args.save_optimal: with open(os.path.join(args.location, f'{agent_name}_params.json'), 'w') as outfile: json.dump(prepper(copy(optimizer.optimal)), outfile)
import matplotlib.pyplot as plt import numpy as np import torch from librubiks import gpu, no_grad from librubiks import cube from librubiks.model import Model from librubiks.utils import TickTock, Logger tt = TickTock() log = Logger("data/local_analyses/net.log", "Analyzing MCTS") net = Model.load("data/local_method_comparison/asgerfix").eval().to(gpu) def _get_adi_ff_slices(b, n): slice_size = n // b + 1 # Final slice may have overflow, however this is simply ignored when indexing slices = [slice(i * slice_size, (i + 1) * slice_size) for i in range(b)] return slices def _ff(oh_states, value=True, policy=True): batches = 1 while True: try: value_parts = [net(oh_states[slice_], policy=policy, value=value).squeeze() for slice_ in _get_adi_ff_slices(batches, len(oh_states))] values = torch.cat(value_parts).cpu() break except RuntimeError as e: # Usually caused by running out of vram. If not, the error is still raised, else batch size is reduced if "alloc" not in str(e): raise e