def test_model_config(self): cf = ModelConfig(torch.nn.ReLU()) with open("local_tests/test_config.json", "w", encoding="utf-8") as f: json.dump(cf.as_json_dict(), f) with open("local_tests/test_config.json", encoding="utf-8") as f: cf = ModelConfig.from_json_dict(json.load(f)) assert type(cf.activation_function) == type(torch.nn.ReLU())
def test_train(self): torch.manual_seed(42) #The standard test net = Model.create(ModelConfig()) evaluator = Evaluator(2, max_time=.02, max_states=None, scrambling_depths=[2]) train = Train(rollouts=2, batch_size=2, tau=0.1, alpha_update=.5, gamma=1, rollout_games=2, rollout_depth=3, optim_fn=torch.optim.Adam, agent=PolicySearch(None), lr=1e-6, evaluation_interval=1, evaluator=evaluator, update_interval=1, with_analysis=True, reward_method='schultzfix') # Current net, min_net = train.train(net) train.plot_training("local_tests/local_train_test", "test") assert os.path.exists("local_tests/local_train_test/training_test.png")
def test_resnet(self): config = ModelConfig(architecture = 'res_big') model = Model.create(config) assert next(model.parameters()).device.type == gpu.type model.eval() x = torch.randn(2, 480).to(gpu) model(x) model.train() model(x)
def test_model(self): config = ModelConfig() model = Model.create(config) assert next(model.parameters()).device.type == gpu.type model.eval() x = torch.randn(2, 480).to(gpu) model(x) model.train() model(x)
def test_cost(self): net = Model.create(ModelConfig()).eval() games = 5 states, _ = cube.sequence_scrambler(games, 1, True) agent = AStar(net, lambda_=1, expansions=2) agent.reset(1, 1) i = [] for i, _ in enumerate(states): agent.G[i] = 1 cost = agent.cost(states, i) assert cost.shape == (games, )
def test_agents(self): net = Model.create(ModelConfig()) agents = [ RandomSearch(), BFS(), PolicySearch(net, sample_policy=False), PolicySearch(net, sample_policy=True), ValueSearch(net), EGVM(net, 0.1, 4, 12), ] for s in agents: self._test_agents(s)
def test_save_and_load(self): torch.manual_seed(42) config = ModelConfig() model = Model.create(config, logger=NullLogger()) model_dir = "local_tests/local_model_test" model.save(model_dir) assert os.path.exists(f"{model_dir}/config.json") assert os.path.exists(f"{model_dir}/model.pt") model = Model.load(model_dir).to(gpu) assert next(model.parameters()).device.type == gpu.type
def test_expansion(self): net = Model.create(ModelConfig()).eval() init_state, _, _ = cube.scramble(3) agent = AStar(net, lambda_=0.1, expansions=5) agent.search(init_state, time_limit=1) init_idx = agent.indices[init_state.tostring()] assert init_idx == 1 assert agent.G[init_idx] == 0 for action in cube.action_space: substate = cube.rotate(init_state, *action) idx = agent.indices[substate.tostring()] assert agent.G[idx] == 1 assert agent.parents[idx] == init_idx
def test_agent(self): test_params = { (0, 10), (0.5, 2), (1, 1), } net = Model.create(ModelConfig()).eval() for params in test_params: agent = AStar(net, *params) self._can_win_all_easy_games(agent) agent.reset("Tue", "Herlau") assert not len(agent.indices) assert not len(agent.open_queue)
def _mcts_test(self, state: np.ndarray, search_graph: bool): agent = MCTS(Model.create(ModelConfig()), c=1, search_graph=search_graph) solved = agent.search(state, .2) # Indices assert agent.indices[state.tostring()] == 1 for s, i in agent.indices.items(): assert agent.states[i].tostring() == s assert sorted(agent.indices.values())[0] == 1 assert np.all(np.diff(sorted(agent.indices.values())) == 1) used_idcs = np.array(list(agent.indices.values())) # States assert np.all(agent.states[1] == state) for i, s in enumerate(agent.states): if i not in used_idcs: continue assert s.tostring() in agent.indices assert agent.indices[s.tostring()] == i # Neighbors if not search_graph: for i, neighs in enumerate(agent.neighbors): if i not in used_idcs: continue state = agent.states[i] for j, neighbor_index in enumerate(neighs): assert neighbor_index == 0 or neighbor_index in agent.indices.values( ) if neighbor_index == 0: continue substate = cube.rotate(state, *cube.action_space[j]) assert np.all(agent.states[neighbor_index] == substate) # Policy and value with torch.no_grad(): p, v = agent.net(cube.as_oh(agent.states[used_idcs])) p, v = p.softmax(dim=1).cpu().numpy(), v.squeeze().cpu().numpy() assert np.all(np.isclose(agent.P[used_idcs], p, atol=1e-5)) assert np.all(np.isclose(agent.V[used_idcs], v, atol=1e-5)) # Leaves if not search_graph: assert np.all(agent.neighbors.all(axis=1) != agent.leaves) # W assert agent.W[used_idcs].all() return agent, solved
def test_agent_optim(self, agents=['MCTS', 'AStar', 'EGVM']): run_path = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'librubiks', 'solving', 'hyper_optim.py' ) location = 'local_tests/optim' net = Model(ModelConfig()) net.save(location) for agent in agents: run_settings = { 'location': location, 'agent': agent, 'iterations': 1, 'eval_games': 1, 'depth': 2, 'save_optimal': True, 'use_best': True, 'optimizer': 'BO' } args = [sys.executable, run_path,] for k, v in run_settings.items(): args.extend([f'--{k}', str(v)]) subprocess.check_call(args) # Raises error on problems in call expected_files = [f'{agent}_optimization.log', f'{agent}_params.json'] for fname in expected_files: assert fname in os.listdir(location) return location
def __init__( self, name: str, # Set by parser, should correspond to options in runtrain location: str, rollouts: int, rollout_games: int, rollout_depth: int, batch_size: int, alpha_update: float, lr: float, gamma: float, tau: float, update_interval: int, optim_fn: str, evaluation_interval: int, nn_init: str, is2024: bool, arch: str, analysis: bool, reward_method: str, # Currently not set by argparser/configparser agent=PolicySearch(net=None), scrambling_depths: tuple = (10, ), verbose: bool = True, ): self.name = name assert isinstance(self.name, str) self.rollouts = rollouts assert self.rollouts > 0 self.rollout_games = rollout_games assert self.rollout_games > 0 self.rollout_depth = rollout_depth assert rollout_depth > 0 self.batch_size = batch_size assert 0 < self.batch_size <= self.rollout_games * self.rollout_depth self.alpha_update = alpha_update assert 0 <= alpha_update <= 1 self.lr = lr assert float(lr) and lr <= 1 self.gamma = gamma assert 0 < gamma <= 1 self.tau = tau assert 0 < tau <= 1 self.update_interval = update_interval assert isinstance(self.update_interval, int) and 0 <= self.update_interval self.optim_fn = getattr(torch.optim, optim_fn) assert issubclass(self.optim_fn, torch.optim.Optimizer) self.location = location self.logger = Logger( f"{self.location}/train.log", name, verbose ) #Already creates logger at init to test whether path works self.logger.log(f"Initialized {self.name}") self.evaluator = Evaluator(n_games=self.eval_games, max_time=self.max_time, scrambling_depths=scrambling_depths, logger=self.logger) self.evaluation_interval = evaluation_interval assert isinstance(self.evaluation_interval, int) and 0 <= self.evaluation_interval self.agent = agent assert isinstance(self.agent, DeepAgent) self.is2024 = is2024 assert nn_init in ["glorot", "he"] or ( float(nn_init) or True ),\ f"Initialization must be glorot, he or a number, but was {nn_init}" self.model_cfg = ModelConfig(architecture=arch, is2024=is2024, init=nn_init) self.analysis = analysis assert isinstance(self.analysis, bool) self.reward_method = reward_method assert self.reward_method in [ "paper", "lapanfix", "schultzfix", "reward0" ] assert arch in ["fc_small", "fc_big", "res_small", "res_big", "conv"] if arch == "conv": assert not self.is2024 assert isinstance(self.model_cfg, ModelConfig)
def test_init(self): for init in ['glorot', 'he', 0, 1.123123123e-3]: cf = ModelConfig(init=init) model = Model.create(cf) x = torch.randn(2,480).to(gpu) model(x)