def test_train(self): torch.manual_seed(42) #The standard test net = Model.create(ModelConfig()) evaluator = Evaluator(2, max_time=.02, max_states=None, scrambling_depths=[2]) train = Train(rollouts=2, batch_size=2, tau=0.1, alpha_update=.5, gamma=1, rollout_games=2, rollout_depth=3, optim_fn=torch.optim.Adam, agent=PolicySearch(None), lr=1e-6, evaluation_interval=1, evaluator=evaluator, update_interval=1, with_analysis=True, reward_method='schultzfix') # Current net, min_net = train.train(net) train.plot_training("local_tests/local_train_test", "test") assert os.path.exists("local_tests/local_train_test/training_test.png")
def test_resnet(self): config = ModelConfig(architecture = 'res_big') model = Model.create(config) assert next(model.parameters()).device.type == gpu.type model.eval() x = torch.randn(2, 480).to(gpu) model(x) model.train() model(x)
def test_model(self): config = ModelConfig() model = Model.create(config) assert next(model.parameters()).device.type == gpu.type model.eval() x = torch.randn(2, 480).to(gpu) model(x) model.train() model(x)
def test_cost(self): net = Model.create(ModelConfig()).eval() games = 5 states, _ = cube.sequence_scrambler(games, 1, True) agent = AStar(net, lambda_=1, expansions=2) agent.reset(1, 1) i = [] for i, _ in enumerate(states): agent.G[i] = 1 cost = agent.cost(states, i) assert cost.shape == (games, )
def test_agents(self): net = Model.create(ModelConfig()) agents = [ RandomSearch(), BFS(), PolicySearch(net, sample_policy=False), PolicySearch(net, sample_policy=True), ValueSearch(net), EGVM(net, 0.1, 4, 12), ] for s in agents: self._test_agents(s)
def test_save_and_load(self): torch.manual_seed(42) config = ModelConfig() model = Model.create(config, logger=NullLogger()) model_dir = "local_tests/local_model_test" model.save(model_dir) assert os.path.exists(f"{model_dir}/config.json") assert os.path.exists(f"{model_dir}/model.pt") model = Model.load(model_dir).to(gpu) assert next(model.parameters()).device.type == gpu.type
def test_expansion(self): net = Model.create(ModelConfig()).eval() init_state, _, _ = cube.scramble(3) agent = AStar(net, lambda_=0.1, expansions=5) agent.search(init_state, time_limit=1) init_idx = agent.indices[init_state.tostring()] assert init_idx == 1 assert agent.G[init_idx] == 0 for action in cube.action_space: substate = cube.rotate(init_state, *action) idx = agent.indices[substate.tostring()] assert agent.G[idx] == 1 assert agent.parents[idx] == init_idx
def test_agent(self): test_params = { (0, 10), (0.5, 2), (1, 1), } net = Model.create(ModelConfig()).eval() for params in test_params: agent = AStar(net, *params) self._can_win_all_easy_games(agent) agent.reset("Tue", "Herlau") assert not len(agent.indices) assert not len(agent.open_queue)
def _mcts_test(self, state: np.ndarray, search_graph: bool): agent = MCTS(Model.create(ModelConfig()), c=1, search_graph=search_graph) solved = agent.search(state, .2) # Indices assert agent.indices[state.tostring()] == 1 for s, i in agent.indices.items(): assert agent.states[i].tostring() == s assert sorted(agent.indices.values())[0] == 1 assert np.all(np.diff(sorted(agent.indices.values())) == 1) used_idcs = np.array(list(agent.indices.values())) # States assert np.all(agent.states[1] == state) for i, s in enumerate(agent.states): if i not in used_idcs: continue assert s.tostring() in agent.indices assert agent.indices[s.tostring()] == i # Neighbors if not search_graph: for i, neighs in enumerate(agent.neighbors): if i not in used_idcs: continue state = agent.states[i] for j, neighbor_index in enumerate(neighs): assert neighbor_index == 0 or neighbor_index in agent.indices.values( ) if neighbor_index == 0: continue substate = cube.rotate(state, *cube.action_space[j]) assert np.all(agent.states[neighbor_index] == substate) # Policy and value with torch.no_grad(): p, v = agent.net(cube.as_oh(agent.states[used_idcs])) p, v = p.softmax(dim=1).cpu().numpy(), v.squeeze().cpu().numpy() assert np.all(np.isclose(agent.P[used_idcs], p, atol=1e-5)) assert np.all(np.isclose(agent.V[used_idcs], v, atol=1e-5)) # Leaves if not search_graph: assert np.all(agent.neighbors.all(axis=1) != agent.leaves) # W assert agent.W[used_idcs].all() return agent, solved
def execute(self): # Sets representation self.logger.section( f"Starting job:\n{self.name} with {'20x24' if get_is2024() else '6x8x6'} representation\nLocation {self.location}\nCommit: {get_commit()}" ) train = Train( self.rollouts, batch_size=self.batch_size, rollout_games=self.rollout_games, rollout_depth=self.rollout_depth, optim_fn=self.optim_fn, alpha_update=self.alpha_update, lr=self.lr, gamma=self.gamma, tau=self.tau, reward_method=self.reward_method, update_interval=self.update_interval, agent=self.agent, logger=self.logger, evaluation_interval=self.evaluation_interval, evaluator=self.evaluator, with_analysis=self.analysis, ) self.logger( f"Rough upper bound on total evaluation time during training: {len(train.evaluation_rollouts)*self.evaluator.approximate_time()/60:.2f} min" ) net = Model.create(self.model_cfg, self.logger) net, min_net = train.train(net) net.save(self.location) if self.evaluation_interval: min_net.save(self.location, True) train.plot_training(self.location, name=self.name) analysispath = os.path.join(self.location, "analysis") datapath = os.path.join(self.location, "train-data") os.mkdir(datapath) os.mkdir(analysispath) if self.analysis: train.analysis.plot_substate_distributions(analysispath) train.analysis.plot_value_targets(analysispath) train.analysis.plot_net_changes(analysispath) train.analysis.visualize_first_states(analysispath) np.save(f"{datapath}/avg_target_values.npy", train.analysis.avg_value_targets) np.save(f"{datapath}/policy_entropies.npy", train.analysis.policy_entropies) np.save(f"{datapath}/substate_val_stds.npy", train.analysis.substate_val_stds) np.save(f"{datapath}/rollouts.npy", train.train_rollouts) np.save(f"{datapath}/policy_losses.npy", train.policy_losses) np.save(f"{datapath}/value_losses.npy", train.value_losses) np.save(f"{datapath}/losses.npy", train.train_losses) np.save(f"{datapath}/evaluation_rollouts.npy", train.evaluation_rollouts) np.save(f"{datapath}/evaluations.npy", train.sol_percents) return train.train_rollouts, train.train_losses
def test_init(self): for init in ['glorot', 'he', 0, 1.123123123e-3]: cf = ModelConfig(init=init) model = Model.create(cf) x = torch.randn(2,480).to(gpu) model(x)