Ejemplo n.º 1
0
    def test_train(self):
        torch.manual_seed(42)
        #The standard test
        net = Model.create(ModelConfig())
        evaluator = Evaluator(2,
                              max_time=.02,
                              max_states=None,
                              scrambling_depths=[2])
        train = Train(rollouts=2,
                      batch_size=2,
                      tau=0.1,
                      alpha_update=.5,
                      gamma=1,
                      rollout_games=2,
                      rollout_depth=3,
                      optim_fn=torch.optim.Adam,
                      agent=PolicySearch(None),
                      lr=1e-6,
                      evaluation_interval=1,
                      evaluator=evaluator,
                      update_interval=1,
                      with_analysis=True,
                      reward_method='schultzfix')

        # Current
        net, min_net = train.train(net)

        train.plot_training("local_tests/local_train_test", "test")
        assert os.path.exists("local_tests/local_train_test/training_test.png")
Ejemplo n.º 2
0
	def test_resnet(self):
		config = ModelConfig(architecture = 'res_big')
		model = Model.create(config)
		assert next(model.parameters()).device.type == gpu.type
		model.eval()
		x = torch.randn(2, 480).to(gpu)
		model(x)
		model.train()
		model(x)
Ejemplo n.º 3
0
	def test_model(self):
		config = ModelConfig()
		model = Model.create(config)
		assert next(model.parameters()).device.type == gpu.type
		model.eval()
		x = torch.randn(2, 480).to(gpu)
		model(x)
		model.train()
		model(x)
Ejemplo n.º 4
0
 def test_cost(self):
     net = Model.create(ModelConfig()).eval()
     games = 5
     states, _ = cube.sequence_scrambler(games, 1, True)
     agent = AStar(net, lambda_=1, expansions=2)
     agent.reset(1, 1)
     i = []
     for i, _ in enumerate(states):
         agent.G[i] = 1
     cost = agent.cost(states, i)
     assert cost.shape == (games, )
Ejemplo n.º 5
0
 def test_agents(self):
     net = Model.create(ModelConfig())
     agents = [
         RandomSearch(),
         BFS(),
         PolicySearch(net, sample_policy=False),
         PolicySearch(net, sample_policy=True),
         ValueSearch(net),
         EGVM(net, 0.1, 4, 12),
     ]
     for s in agents:
         self._test_agents(s)
Ejemplo n.º 6
0
	def test_save_and_load(self):
		torch.manual_seed(42)

		config = ModelConfig()
		model = Model.create(config, logger=NullLogger())
		model_dir = "local_tests/local_model_test"
		model.save(model_dir)
		assert os.path.exists(f"{model_dir}/config.json")
		assert os.path.exists(f"{model_dir}/model.pt")

		model = Model.load(model_dir).to(gpu)
		assert next(model.parameters()).device.type == gpu.type
Ejemplo n.º 7
0
 def test_expansion(self):
     net = Model.create(ModelConfig()).eval()
     init_state, _, _ = cube.scramble(3)
     agent = AStar(net, lambda_=0.1, expansions=5)
     agent.search(init_state, time_limit=1)
     init_idx = agent.indices[init_state.tostring()]
     assert init_idx == 1
     assert agent.G[init_idx] == 0
     for action in cube.action_space:
         substate = cube.rotate(init_state, *action)
         idx = agent.indices[substate.tostring()]
         assert agent.G[idx] == 1
         assert agent.parents[idx] == init_idx
Ejemplo n.º 8
0
 def test_agent(self):
     test_params = {
         (0, 10),
         (0.5, 2),
         (1, 1),
     }
     net = Model.create(ModelConfig()).eval()
     for params in test_params:
         agent = AStar(net, *params)
         self._can_win_all_easy_games(agent)
         agent.reset("Tue", "Herlau")
         assert not len(agent.indices)
         assert not len(agent.open_queue)
Ejemplo n.º 9
0
    def _mcts_test(self, state: np.ndarray, search_graph: bool):
        agent = MCTS(Model.create(ModelConfig()),
                     c=1,
                     search_graph=search_graph)
        solved = agent.search(state, .2)

        # Indices
        assert agent.indices[state.tostring()] == 1
        for s, i in agent.indices.items():
            assert agent.states[i].tostring() == s
        assert sorted(agent.indices.values())[0] == 1
        assert np.all(np.diff(sorted(agent.indices.values())) == 1)

        used_idcs = np.array(list(agent.indices.values()))

        # States
        assert np.all(agent.states[1] == state)
        for i, s in enumerate(agent.states):
            if i not in used_idcs: continue
            assert s.tostring() in agent.indices
            assert agent.indices[s.tostring()] == i

        # Neighbors
        if not search_graph:
            for i, neighs in enumerate(agent.neighbors):
                if i not in used_idcs: continue
                state = agent.states[i]
                for j, neighbor_index in enumerate(neighs):
                    assert neighbor_index == 0 or neighbor_index in agent.indices.values(
                    )
                    if neighbor_index == 0: continue
                    substate = cube.rotate(state, *cube.action_space[j])
                    assert np.all(agent.states[neighbor_index] == substate)

        # Policy and value
        with torch.no_grad():
            p, v = agent.net(cube.as_oh(agent.states[used_idcs]))
        p, v = p.softmax(dim=1).cpu().numpy(), v.squeeze().cpu().numpy()
        assert np.all(np.isclose(agent.P[used_idcs], p, atol=1e-5))
        assert np.all(np.isclose(agent.V[used_idcs], v, atol=1e-5))

        # Leaves
        if not search_graph:
            assert np.all(agent.neighbors.all(axis=1) != agent.leaves)

        # W
        assert agent.W[used_idcs].all()

        return agent, solved
Ejemplo n.º 10
0
    def execute(self):

        # Sets representation
        self.logger.section(
            f"Starting job:\n{self.name} with {'20x24' if get_is2024() else '6x8x6'} representation\nLocation {self.location}\nCommit: {get_commit()}"
        )

        train = Train(
            self.rollouts,
            batch_size=self.batch_size,
            rollout_games=self.rollout_games,
            rollout_depth=self.rollout_depth,
            optim_fn=self.optim_fn,
            alpha_update=self.alpha_update,
            lr=self.lr,
            gamma=self.gamma,
            tau=self.tau,
            reward_method=self.reward_method,
            update_interval=self.update_interval,
            agent=self.agent,
            logger=self.logger,
            evaluation_interval=self.evaluation_interval,
            evaluator=self.evaluator,
            with_analysis=self.analysis,
        )
        self.logger(
            f"Rough upper bound on total evaluation time during training: {len(train.evaluation_rollouts)*self.evaluator.approximate_time()/60:.2f} min"
        )

        net = Model.create(self.model_cfg, self.logger)
        net, min_net = train.train(net)
        net.save(self.location)
        if self.evaluation_interval:
            min_net.save(self.location, True)

        train.plot_training(self.location, name=self.name)
        analysispath = os.path.join(self.location, "analysis")
        datapath = os.path.join(self.location, "train-data")
        os.mkdir(datapath)
        os.mkdir(analysispath)

        if self.analysis:
            train.analysis.plot_substate_distributions(analysispath)
            train.analysis.plot_value_targets(analysispath)
            train.analysis.plot_net_changes(analysispath)
            train.analysis.visualize_first_states(analysispath)
            np.save(f"{datapath}/avg_target_values.npy",
                    train.analysis.avg_value_targets)
            np.save(f"{datapath}/policy_entropies.npy",
                    train.analysis.policy_entropies)
            np.save(f"{datapath}/substate_val_stds.npy",
                    train.analysis.substate_val_stds)

        np.save(f"{datapath}/rollouts.npy", train.train_rollouts)
        np.save(f"{datapath}/policy_losses.npy", train.policy_losses)
        np.save(f"{datapath}/value_losses.npy", train.value_losses)
        np.save(f"{datapath}/losses.npy", train.train_losses)
        np.save(f"{datapath}/evaluation_rollouts.npy",
                train.evaluation_rollouts)
        np.save(f"{datapath}/evaluations.npy", train.sol_percents)

        return train.train_rollouts, train.train_losses
Ejemplo n.º 11
0
	def test_init(self):
		for init in ['glorot', 'he', 0, 1.123123123e-3]:
			cf = ModelConfig(init=init)
			model = Model.create(cf)
			x = torch.randn(2,480).to(gpu)
			model(x)