Example #1
0
	def test_model_config(self):
		cf = ModelConfig(torch.nn.ReLU())
		with open("local_tests/test_config.json", "w", encoding="utf-8") as f:
			json.dump(cf.as_json_dict(), f)
		with open("local_tests/test_config.json", encoding="utf-8") as f:
			cf = ModelConfig.from_json_dict(json.load(f))
		assert type(cf.activation_function) == type(torch.nn.ReLU())
Example #2
0
    def test_train(self):
        torch.manual_seed(42)
        #The standard test
        net = Model.create(ModelConfig())
        evaluator = Evaluator(2,
                              max_time=.02,
                              max_states=None,
                              scrambling_depths=[2])
        train = Train(rollouts=2,
                      batch_size=2,
                      tau=0.1,
                      alpha_update=.5,
                      gamma=1,
                      rollout_games=2,
                      rollout_depth=3,
                      optim_fn=torch.optim.Adam,
                      agent=PolicySearch(None),
                      lr=1e-6,
                      evaluation_interval=1,
                      evaluator=evaluator,
                      update_interval=1,
                      with_analysis=True,
                      reward_method='schultzfix')

        # Current
        net, min_net = train.train(net)

        train.plot_training("local_tests/local_train_test", "test")
        assert os.path.exists("local_tests/local_train_test/training_test.png")
Example #3
0
	def test_resnet(self):
		config = ModelConfig(architecture = 'res_big')
		model = Model.create(config)
		assert next(model.parameters()).device.type == gpu.type
		model.eval()
		x = torch.randn(2, 480).to(gpu)
		model(x)
		model.train()
		model(x)
Example #4
0
	def test_model(self):
		config = ModelConfig()
		model = Model.create(config)
		assert next(model.parameters()).device.type == gpu.type
		model.eval()
		x = torch.randn(2, 480).to(gpu)
		model(x)
		model.train()
		model(x)
Example #5
0
 def test_cost(self):
     net = Model.create(ModelConfig()).eval()
     games = 5
     states, _ = cube.sequence_scrambler(games, 1, True)
     agent = AStar(net, lambda_=1, expansions=2)
     agent.reset(1, 1)
     i = []
     for i, _ in enumerate(states):
         agent.G[i] = 1
     cost = agent.cost(states, i)
     assert cost.shape == (games, )
Example #6
0
 def test_agents(self):
     net = Model.create(ModelConfig())
     agents = [
         RandomSearch(),
         BFS(),
         PolicySearch(net, sample_policy=False),
         PolicySearch(net, sample_policy=True),
         ValueSearch(net),
         EGVM(net, 0.1, 4, 12),
     ]
     for s in agents:
         self._test_agents(s)
Example #7
0
	def test_save_and_load(self):
		torch.manual_seed(42)

		config = ModelConfig()
		model = Model.create(config, logger=NullLogger())
		model_dir = "local_tests/local_model_test"
		model.save(model_dir)
		assert os.path.exists(f"{model_dir}/config.json")
		assert os.path.exists(f"{model_dir}/model.pt")

		model = Model.load(model_dir).to(gpu)
		assert next(model.parameters()).device.type == gpu.type
Example #8
0
 def test_expansion(self):
     net = Model.create(ModelConfig()).eval()
     init_state, _, _ = cube.scramble(3)
     agent = AStar(net, lambda_=0.1, expansions=5)
     agent.search(init_state, time_limit=1)
     init_idx = agent.indices[init_state.tostring()]
     assert init_idx == 1
     assert agent.G[init_idx] == 0
     for action in cube.action_space:
         substate = cube.rotate(init_state, *action)
         idx = agent.indices[substate.tostring()]
         assert agent.G[idx] == 1
         assert agent.parents[idx] == init_idx
Example #9
0
 def test_agent(self):
     test_params = {
         (0, 10),
         (0.5, 2),
         (1, 1),
     }
     net = Model.create(ModelConfig()).eval()
     for params in test_params:
         agent = AStar(net, *params)
         self._can_win_all_easy_games(agent)
         agent.reset("Tue", "Herlau")
         assert not len(agent.indices)
         assert not len(agent.open_queue)
Example #10
0
    def _mcts_test(self, state: np.ndarray, search_graph: bool):
        agent = MCTS(Model.create(ModelConfig()),
                     c=1,
                     search_graph=search_graph)
        solved = agent.search(state, .2)

        # Indices
        assert agent.indices[state.tostring()] == 1
        for s, i in agent.indices.items():
            assert agent.states[i].tostring() == s
        assert sorted(agent.indices.values())[0] == 1
        assert np.all(np.diff(sorted(agent.indices.values())) == 1)

        used_idcs = np.array(list(agent.indices.values()))

        # States
        assert np.all(agent.states[1] == state)
        for i, s in enumerate(agent.states):
            if i not in used_idcs: continue
            assert s.tostring() in agent.indices
            assert agent.indices[s.tostring()] == i

        # Neighbors
        if not search_graph:
            for i, neighs in enumerate(agent.neighbors):
                if i not in used_idcs: continue
                state = agent.states[i]
                for j, neighbor_index in enumerate(neighs):
                    assert neighbor_index == 0 or neighbor_index in agent.indices.values(
                    )
                    if neighbor_index == 0: continue
                    substate = cube.rotate(state, *cube.action_space[j])
                    assert np.all(agent.states[neighbor_index] == substate)

        # Policy and value
        with torch.no_grad():
            p, v = agent.net(cube.as_oh(agent.states[used_idcs]))
        p, v = p.softmax(dim=1).cpu().numpy(), v.squeeze().cpu().numpy()
        assert np.all(np.isclose(agent.P[used_idcs], p, atol=1e-5))
        assert np.all(np.isclose(agent.V[used_idcs], v, atol=1e-5))

        # Leaves
        if not search_graph:
            assert np.all(agent.neighbors.all(axis=1) != agent.leaves)

        # W
        assert agent.W[used_idcs].all()

        return agent, solved
Example #11
0
	def test_agent_optim(self, agents=['MCTS', 'AStar', 'EGVM']):

		run_path = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'librubiks', 'solving', 'hyper_optim.py' )
		location = 'local_tests/optim'

		net = Model(ModelConfig())
		net.save(location)
		for agent in agents:

			run_settings = { 'location': location, 'agent': agent, 'iterations': 1, 'eval_games': 1, 'depth': 2, 'save_optimal': True, 'use_best': True, 'optimizer': 'BO' }
			args = [sys.executable, run_path,]
			for k, v in run_settings.items(): args.extend([f'--{k}', str(v)])
			subprocess.check_call(args)  # Raises error on problems in call

			expected_files = [f'{agent}_optimization.log', f'{agent}_params.json']

			for fname in expected_files: assert fname in os.listdir(location)

		return location
Example #12
0
    def __init__(
            self,
            name: str,
            # Set by parser, should correspond to options in runtrain
            location: str,
            rollouts: int,
            rollout_games: int,
            rollout_depth: int,
            batch_size: int,
            alpha_update: float,
            lr: float,
            gamma: float,
            tau: float,
            update_interval: int,
            optim_fn: str,
            evaluation_interval: int,
            nn_init: str,
            is2024: bool,
            arch: str,
            analysis: bool,
            reward_method: str,

            # Currently not set by argparser/configparser
            agent=PolicySearch(net=None),
            scrambling_depths: tuple = (10, ),
            verbose: bool = True,
    ):

        self.name = name
        assert isinstance(self.name, str)

        self.rollouts = rollouts
        assert self.rollouts > 0
        self.rollout_games = rollout_games
        assert self.rollout_games > 0
        self.rollout_depth = rollout_depth
        assert rollout_depth > 0
        self.batch_size = batch_size
        assert 0 < self.batch_size <= self.rollout_games * self.rollout_depth

        self.alpha_update = alpha_update
        assert 0 <= alpha_update <= 1
        self.lr = lr
        assert float(lr) and lr <= 1
        self.gamma = gamma
        assert 0 < gamma <= 1
        self.tau = tau
        assert 0 < tau <= 1
        self.update_interval = update_interval
        assert isinstance(self.update_interval,
                          int) and 0 <= self.update_interval
        self.optim_fn = getattr(torch.optim, optim_fn)
        assert issubclass(self.optim_fn, torch.optim.Optimizer)

        self.location = location
        self.logger = Logger(
            f"{self.location}/train.log", name, verbose
        )  #Already creates logger at init to test whether path works
        self.logger.log(f"Initialized {self.name}")

        self.evaluator = Evaluator(n_games=self.eval_games,
                                   max_time=self.max_time,
                                   scrambling_depths=scrambling_depths,
                                   logger=self.logger)
        self.evaluation_interval = evaluation_interval
        assert isinstance(self.evaluation_interval,
                          int) and 0 <= self.evaluation_interval
        self.agent = agent
        assert isinstance(self.agent, DeepAgent)
        self.is2024 = is2024

        assert nn_init in ["glorot", "he"] or ( float(nn_init) or True ),\
          f"Initialization must be glorot, he or a number, but was {nn_init}"
        self.model_cfg = ModelConfig(architecture=arch,
                                     is2024=is2024,
                                     init=nn_init)

        self.analysis = analysis
        assert isinstance(self.analysis, bool)

        self.reward_method = reward_method
        assert self.reward_method in [
            "paper", "lapanfix", "schultzfix", "reward0"
        ]

        assert arch in ["fc_small", "fc_big", "res_small", "res_big", "conv"]
        if arch == "conv": assert not self.is2024
        assert isinstance(self.model_cfg, ModelConfig)
Example #13
0
	def test_init(self):
		for init in ['glorot', 'he', 0, 1.123123123e-3]:
			cf = ModelConfig(init=init)
			model = Model.create(cf)
			x = torch.randn(2,480).to(gpu)
			model(x)