Beispiel #1
0
	def test_save_and_load(self):
		torch.manual_seed(42)

		config = ModelConfig()
		model = Model.create(config, logger=NullLogger())
		model_dir = "local_tests/local_model_test"
		model.save(model_dir)
		assert os.path.exists(f"{model_dir}/config.json")
		assert os.path.exists(f"{model_dir}/model.pt")

		model = Model.load(model_dir).to(gpu)
		assert next(model.parameters()).device.type == gpu.type
Beispiel #2
0
	def _update_gen_net(self, generator_net: Model, net: Model):
		"""Create a network with parameters weighted by self.tau"""
		self.tt.profile("Creating generator network")
		genparams, netparams = generator_net.state_dict(), net.state_dict()
		new_genparams = dict(genparams)
		for pname, param in netparams.items():
			new_genparams[pname].data.copy_(
					self.tau * param.data.to(gpu) + (1-self.tau) * new_genparams[pname].data.to(gpu)
					)
		generator_net.load_state_dict(new_genparams)
		self.tt.end_profile("Creating generator network")
		return generator_net.to(gpu)
Beispiel #3
0
	def rollout(self, net: Model, rollout: int, value_targets: torch.Tensor):
		"""Saves statistics after a rollout has been performed for understanding the loss development

		:param torch.nn.Model net: The current net, used for saving values and policies of first 12 states
		:param rollout int: The rollout number. Used to determine whether it is evaluation time => check targets
		:param torch.Tensor value_targets: Used for visualizing value change
		"""
		# First time
		if self.params is None: self.params = net.get_params()

		# Keeping track of the entropy off on the 12-dimensional log-probability policy-output
		entropies = [entropy(policy, axis=1) for policy in self.rollout_policy]
		#Currently:  Mean over all games in entire rollout. Maybe we want it more fine grained later.
		self.policy_entropies.append(np.mean( [np.nanmean(entropy) for entropy in entropies] ))
		self.rollout_policy = list() #reset for next rollout

		if rollout in self.evaluations:
			net.eval()

			# Calculating value targets
			targets = value_targets.cpu().numpy().reshape((-1, self.depth))
			self.avg_value_targets.append(targets.mean(axis=0))

			# Calculating model change
			model_change = torch.sqrt((net.get_params()-self.params)**2).mean().cpu()
			model_total_change = torch.sqrt((net.get_params()-self.orig_params)**2).mean().cpu()
			self.params = net.get_params()
			self.param_changes.append(float(model_change))
			self.param_total_changes.append(model_total_change)

			#In the beginning: Calculate value given to first 12 substates
			if rollout <= self.extra_evals:
				self.first_state_values.append( net(self.first_states, policy=False, value=True).detach().cpu().numpy() )

			net.train()
Beispiel #4
0
    def test_train(self):
        torch.manual_seed(42)
        #The standard test
        net = Model.create(ModelConfig())
        evaluator = Evaluator(2,
                              max_time=.02,
                              max_states=None,
                              scrambling_depths=[2])
        train = Train(rollouts=2,
                      batch_size=2,
                      tau=0.1,
                      alpha_update=.5,
                      gamma=1,
                      rollout_games=2,
                      rollout_depth=3,
                      optim_fn=torch.optim.Adam,
                      agent=PolicySearch(None),
                      lr=1e-6,
                      evaluation_interval=1,
                      evaluator=evaluator,
                      update_interval=1,
                      with_analysis=True,
                      reward_method='schultzfix')

        # Current
        net, min_net = train.train(net)

        train.plot_training("local_tests/local_train_test", "test")
        assert os.path.exists("local_tests/local_train_test/training_test.png")
Beispiel #5
0
	def test_resnet(self):
		config = ModelConfig(architecture = 'res_big')
		model = Model.create(config)
		assert next(model.parameters()).device.type == gpu.type
		model.eval()
		x = torch.randn(2, 480).to(gpu)
		model(x)
		model.train()
		model(x)
Beispiel #6
0
	def test_model(self):
		config = ModelConfig()
		model = Model.create(config)
		assert next(model.parameters()).device.type == gpu.type
		model.eval()
		x = torch.randn(2, 480).to(gpu)
		model(x)
		model.train()
		model(x)
Beispiel #7
0
	def test_agent_optim(self, agents=['MCTS', 'AStar', 'EGVM']):

		run_path = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'librubiks', 'solving', 'hyper_optim.py' )
		location = 'local_tests/optim'

		net = Model(ModelConfig())
		net.save(location)
		for agent in agents:

			run_settings = { 'location': location, 'agent': agent, 'iterations': 1, 'eval_games': 1, 'depth': 2, 'save_optimal': True, 'use_best': True, 'optimizer': 'BO' }
			args = [sys.executable, run_path,]
			for k, v in run_settings.items(): args.extend([f'--{k}', str(v)])
			subprocess.check_call(args)  # Raises error on problems in call

			expected_files = [f'{agent}_optimization.log', f'{agent}_params.json']

			for fname in expected_files: assert fname in os.listdir(location)

		return location
Beispiel #8
0
 def test_cost(self):
     net = Model.create(ModelConfig()).eval()
     games = 5
     states, _ = cube.sequence_scrambler(games, 1, True)
     agent = AStar(net, lambda_=1, expansions=2)
     agent.reset(1, 1)
     i = []
     for i, _ in enumerate(states):
         agent.G[i] = 1
     cost = agent.cost(states, i)
     assert cost.shape == (games, )
Beispiel #9
0
 def test_agents(self):
     net = Model.create(ModelConfig())
     agents = [
         RandomSearch(),
         BFS(),
         PolicySearch(net, sample_policy=False),
         PolicySearch(net, sample_policy=True),
         ValueSearch(net),
         EGVM(net, 0.1, 4, 12),
     ]
     for s in agents:
         self._test_agents(s)
Beispiel #10
0
 def test_expansion(self):
     net = Model.create(ModelConfig()).eval()
     init_state, _, _ = cube.scramble(3)
     agent = AStar(net, lambda_=0.1, expansions=5)
     agent.search(init_state, time_limit=1)
     init_idx = agent.indices[init_state.tostring()]
     assert init_idx == 1
     assert agent.G[init_idx] == 0
     for action in cube.action_space:
         substate = cube.rotate(init_state, *action)
         idx = agent.indices[substate.tostring()]
         assert agent.G[idx] == 1
         assert agent.parents[idx] == init_idx
Beispiel #11
0
 def test_agent(self):
     test_params = {
         (0, 10),
         (0.5, 2),
         (1, 1),
     }
     net = Model.create(ModelConfig()).eval()
     for params in test_params:
         agent = AStar(net, *params)
         self._can_win_all_easy_games(agent)
         agent.reset("Tue", "Herlau")
         assert not len(agent.indices)
         assert not len(agent.open_queue)
Beispiel #12
0
    def _mcts_test(self, state: np.ndarray, search_graph: bool):
        agent = MCTS(Model.create(ModelConfig()),
                     c=1,
                     search_graph=search_graph)
        solved = agent.search(state, .2)

        # Indices
        assert agent.indices[state.tostring()] == 1
        for s, i in agent.indices.items():
            assert agent.states[i].tostring() == s
        assert sorted(agent.indices.values())[0] == 1
        assert np.all(np.diff(sorted(agent.indices.values())) == 1)

        used_idcs = np.array(list(agent.indices.values()))

        # States
        assert np.all(agent.states[1] == state)
        for i, s in enumerate(agent.states):
            if i not in used_idcs: continue
            assert s.tostring() in agent.indices
            assert agent.indices[s.tostring()] == i

        # Neighbors
        if not search_graph:
            for i, neighs in enumerate(agent.neighbors):
                if i not in used_idcs: continue
                state = agent.states[i]
                for j, neighbor_index in enumerate(neighs):
                    assert neighbor_index == 0 or neighbor_index in agent.indices.values(
                    )
                    if neighbor_index == 0: continue
                    substate = cube.rotate(state, *cube.action_space[j])
                    assert np.all(agent.states[neighbor_index] == substate)

        # Policy and value
        with torch.no_grad():
            p, v = agent.net(cube.as_oh(agent.states[used_idcs]))
        p, v = p.softmax(dim=1).cpu().numpy(), v.squeeze().cpu().numpy()
        assert np.all(np.isclose(agent.P[used_idcs], p, atol=1e-5))
        assert np.all(np.isclose(agent.V[used_idcs], v, atol=1e-5))

        # Leaves
        if not search_graph:
            assert np.all(agent.neighbors.all(axis=1) != agent.leaves)

        # W
        assert agent.W[used_idcs].all()

        return agent, solved
Beispiel #13
0
import matplotlib.pyplot as plt
import numpy as np
import torch

from librubiks import gpu, no_grad
from librubiks import cube
from librubiks.model import Model
from librubiks.utils import TickTock, Logger

tt = TickTock()
log = Logger("data/local_analyses/net.log", "Analyzing MCTS")
net = Model.load("data/local_method_comparison/asgerfix").eval().to(gpu)


def _get_adi_ff_slices(b, n):
	slice_size = n // b + 1
	# Final slice may have overflow, however this is simply ignored when indexing
	slices = [slice(i * slice_size, (i + 1) * slice_size) for i in range(b)]
	return slices

def _ff(oh_states, value=True, policy=True):
	batches = 1
	while True:
		try:
			value_parts = [net(oh_states[slice_], policy=policy, value=value).squeeze() for slice_ in
						   _get_adi_ff_slices(batches, len(oh_states))]
			values = torch.cat(value_parts).cpu()
			break
		except RuntimeError as e:  # Usually caused by running out of vram. If not, the error is still raised, else batch size is reduced
			if "alloc" not in str(e):
				raise e
Beispiel #14
0
	def from_saved(cls, loc: str, use_best: bool, sample_policy=False):
		net = Model.load(loc, load_best=use_best)
		net.to(gpu)
		return cls(net, sample_policy)
Beispiel #15
0
def agent_optimize():
	"""
	Main way to run optimization. Hard coded to run optimization at 1 sec per game, but other behaviour can be set with CLI arguments seen by
	running `python librubiks/solving/hyper_optim.py --help`.
	Does not support config arguments.
	NB: The path here is different to the one in runeval and runtrain:
	It needs to be to folder containing model.pt! It doesen't work with parent folder.

	Can work with runeval through
	```
	python librubiks/solving/hyper_optim.py --location example/net1/
	python runeval.py --location example/ --optimized_params True
	```
	"""
	set_seeds()

	#Lot of overhead just for default argument niceness: latest model is latest
	from runeval import train_folders

	model_path = ''
	if train_folders:
		for folder in [train_folders[-1]] + glob(f"{train_folders[-1]}/*/"):
			if os.path.isfile(os.path.join(folder, 'model.pt')):
				model_path = os.path.join(folder)
				break

	parser = argparse.ArgumentParser(description='Optimize Monte Carlo Tree Search for one model')
	parser.add_argument('--location', help='Folder which includes  model.pt. Results will also be saved here',
		type=str, default=model_path)
	parser.add_argument('--iterations', help='Number of iterations of Bayesian Optimization',
		type=int, default=125)
	parser.add_argument('--agent', help='Name of agent corresponding to agent class in librubiks.solving.agents',
		type=str, default='AStar', choices = ['AStar', 'MCTS', 'EGVM'])
	parser.add_argument('--depth', help='Single number corresponding to the depth at which to test. If 0: run this at deep',
		type=int, default=0)
	parser.add_argument('--eval_games', help='Number of games to evaluate at depth',
			type = int, default='100')
	parser.add_argument('--save_optimal', help='If Tue, saves a JSON of optimal hyperparameters usable for runeval',
			type=literal_eval, default=True, choices = [True, False])
	parser.add_argument('--use_best', help="Set to True to use model-best.pt instead of model.pt.", type=literal_eval, default=True,
			choices = [True, False])
	parser.add_argument('--optim_lengths', help="Set to true to optimize against sol percentage / solution length. Else, simply use sol %", type=literal_eval,
			default=True, choices = [True, False])
	parser.add_argument('--optimizer', help="Either BO or grid", type=str, default="grid", choices = ("grid", "BO"))

	args = parser.parse_args()

	agent_name = args.agent
	if agent_name == 'MCTS':
		params = {
			'c': (0.1, 100),
		}
		def prepper(params): return params

		persistent_params = {
			'net': Model.load(args.location, load_best=args.use_best),
			'search_graph': True,
		}
	elif agent_name == 'AStar':
		params = {
			'lambda_':    (0, 0.4),
			'expansions': (1, 1000),
		}
		def prepper(params):
			params['expansions'] = int(params['expansions'])
			return params

		persistent_params = {
			'net': Model.load(args.location, load_best=args.use_best),
		}
	elif agent_name == 'EGVM':
		params = {
				'epsilon': (0, 0.5),
				'workers': (1, 500),
				'depth':   (1, 250),
			}

		def prepper(params):
			params['workers'] = int(params['workers'])
			params['depth'] = int(params['depth'])
			return params

		persistent_params = {
			'net': Model.load(args.location, load_best=args.use_best),
		}
	else:
		raise NameError(f"{agent_name} does not correspond to a known agent, please pick either AStar, MCTS or EGVM")

	logger = Logger(os.path.join(args.location, f'{agent_name}_optimization.log'), 'Optimization')

	logger.log(f"{agent_name} optimization. Using network from {model_path}.")
	logger.log(f"Received arguments: {vars(args)}")

	agent = getattr(agents, agent_name)

	evaluator = Evaluator(n_games=args.eval_games, max_time=5, scrambling_depths=range(0) if args.depth == 0 else [args.depth])
	assert args.optimizer in ["BO", "grid"], f"Optimizer should be 'BO' or 'grid', not '{args.optimizer}'"
	if args.optimizer == "BO":
		optimizer = BayesianOptimizer(target_function=None, parameters=params, logger=logger)
	else:
		optimizer = GridSearch(target_function=None, parameters=params, logger=logger)
	optimizer.objective_from_evaluator(evaluator, agent, persistent_params, param_prepper=prepper, optim_lengths=args.optim_lengths)
	optimizer.optimize(args.iterations)

	if args.save_optimal:
		with open(os.path.join(args.location, f'{agent_name}_params.json'), 'w') as outfile:
			json.dump(prepper(copy(optimizer.optimal)), outfile)
Beispiel #16
0
    def execute(self):

        # Sets representation
        self.logger.section(
            f"Starting job:\n{self.name} with {'20x24' if get_is2024() else '6x8x6'} representation\nLocation {self.location}\nCommit: {get_commit()}"
        )

        train = Train(
            self.rollouts,
            batch_size=self.batch_size,
            rollout_games=self.rollout_games,
            rollout_depth=self.rollout_depth,
            optim_fn=self.optim_fn,
            alpha_update=self.alpha_update,
            lr=self.lr,
            gamma=self.gamma,
            tau=self.tau,
            reward_method=self.reward_method,
            update_interval=self.update_interval,
            agent=self.agent,
            logger=self.logger,
            evaluation_interval=self.evaluation_interval,
            evaluator=self.evaluator,
            with_analysis=self.analysis,
        )
        self.logger(
            f"Rough upper bound on total evaluation time during training: {len(train.evaluation_rollouts)*self.evaluator.approximate_time()/60:.2f} min"
        )

        net = Model.create(self.model_cfg, self.logger)
        net, min_net = train.train(net)
        net.save(self.location)
        if self.evaluation_interval:
            min_net.save(self.location, True)

        train.plot_training(self.location, name=self.name)
        analysispath = os.path.join(self.location, "analysis")
        datapath = os.path.join(self.location, "train-data")
        os.mkdir(datapath)
        os.mkdir(analysispath)

        if self.analysis:
            train.analysis.plot_substate_distributions(analysispath)
            train.analysis.plot_value_targets(analysispath)
            train.analysis.plot_net_changes(analysispath)
            train.analysis.visualize_first_states(analysispath)
            np.save(f"{datapath}/avg_target_values.npy",
                    train.analysis.avg_value_targets)
            np.save(f"{datapath}/policy_entropies.npy",
                    train.analysis.policy_entropies)
            np.save(f"{datapath}/substate_val_stds.npy",
                    train.analysis.substate_val_stds)

        np.save(f"{datapath}/rollouts.npy", train.train_rollouts)
        np.save(f"{datapath}/policy_losses.npy", train.policy_losses)
        np.save(f"{datapath}/value_losses.npy", train.value_losses)
        np.save(f"{datapath}/losses.npy", train.train_losses)
        np.save(f"{datapath}/evaluation_rollouts.npy",
                train.evaluation_rollouts)
        np.save(f"{datapath}/evaluations.npy", train.sol_percents)

        return train.train_rollouts, train.train_losses
Beispiel #17
0
	def train(self, net: Model) -> (Model, Model):
		""" Training loop: generates data, optimizes parameters, evaluates (sometimes) and repeats.

		Trains `net` for `self.rollouts` rollouts each consisting of `self.rollout_games` games and scrambled  `self.rollout_depth`.
		The network is evaluated for each rollout number in `self.evaluations` according to `self.evaluator`.
		Stores multiple performance and training results.

		:param torch.nn.Model net: The network to be trained. Must accept input consistent with cube.get_oh_size()
		:return: The network after all evaluations and the network with the best evaluation score (win fraction)
		:rtype: (torch.nn.Model, torch.nn.Model)
		"""

		self.tt.reset()
		self.tt.tick()
		self.states_per_rollout = self.rollout_depth * self.rollout_games
		self.log(f"Beginning training. Optimization is performed in batches of {self.batch_size}")
		self.log("\n".join([
			f"Rollouts: {self.rollouts}",
			f"Each consisting of {self.rollout_games} games with a depth of {self.rollout_depth}",
			f"Evaluations: {len(self.evaluation_rollouts)}",
		]))
		best_solve = 0
		best_net = net.clone()
		self.agent.net = net
		if self.with_analysis:
			self.analysis.orig_params = net.get_params()

		generator_net = net.clone()

		alpha = 1 if self.alpha_update == 1 else 0
		optimizer = self.optim(net.parameters(), lr=self.lr)
		lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, self.gamma)
		self.policy_losses = np.zeros(self.rollouts)
		self.value_losses = np.zeros(self.rollouts)
		self.train_losses = np.empty(self.rollouts)
		self.sol_percents = list()

		for rollout in range(self.rollouts):
			reset_cuda()

			generator_net = self._update_gen_net(generator_net, net) if self.tau != 1 else net

			self.tt.profile("ADI training data")
			training_data, policy_targets, value_targets, loss_weights = self.ADI_traindata(generator_net, alpha)
			self.tt.profile("To cuda")
			training_data = training_data.to(gpu)
			policy_targets = policy_targets.to(gpu)
			value_targets = value_targets.to(gpu)
			loss_weights = loss_weights.to(gpu)
			self.tt.end_profile("To cuda")
			self.tt.end_profile("ADI training data")

			reset_cuda()

			self.tt.profile("Training loop")
			net.train()
			batches = self._get_batches(self.states_per_rollout, self.batch_size)
			for i, batch in enumerate(batches):
				optimizer.zero_grad()
				policy_pred, value_pred = net(training_data[batch], policy=True, value=True)

				# Use loss on both policy and value
				policy_loss = self.policy_criterion(policy_pred, policy_targets[batch]) * loss_weights[batch]
				value_loss = self.value_criterion(value_pred.squeeze(), value_targets[batch]) * loss_weights[batch]
				loss = torch.mean(policy_loss + value_loss)
				loss.backward()
				optimizer.step()
				self.policy_losses[rollout] += policy_loss.detach().cpu().numpy().mean() / len(batches)
				self.value_losses[rollout] += value_loss.detach().cpu().numpy().mean() / len(batches)

				if self.with_analysis: #Save policy output to compute entropy
					with torch.no_grad():
						self.analysis.rollout_policy.append(
							torch.nn.functional.softmax(policy_pred.detach(), dim=0).cpu().numpy()
						)

			self.train_losses[rollout] = (self.policy_losses[rollout] + self.value_losses[rollout])
			self.tt.end_profile("Training loop")

			# Updates learning rate and alpha
			if rollout and self.update_interval and rollout % self.update_interval == 0:
				if self.gamma != 1:
					lr_scheduler.step()
					lr = optimizer.param_groups[0]["lr"]
					self.log(f"Updated learning rate from {lr/self.gamma:.2e} to {lr:.2e}")
				if (alpha + self.alpha_update <= 1 or np.isclose(alpha + self.alpha_update, 1)) and self.alpha_update:
					alpha += self.alpha_update
					self.log(f"Updated alpha from {alpha-self.alpha_update:.2f} to {alpha:.2f}")
				elif alpha < 1 and alpha + self.alpha_update > 1 and self.alpha_update:
					self.log(f"Updated alpha from {alpha:.2f} to 1")
					alpha = 1

			if self.log.is_verbose() or rollout in (np.linspace(0, 1, 20)*self.rollouts).astype(int):
				self.log(f"Rollout {rollout} completed with mean loss {self.train_losses[rollout]}")

			if self.with_analysis:
				self.tt.profile("Analysis of rollout")
				self.analysis.rollout(net, rollout, value_targets)
				self.tt.end_profile("Analysis of rollout")

			if rollout in self.evaluation_rollouts:
				net.eval()

				self.agent.net = net
				self.tt.profile(f"Evaluating using agent {self.agent}")
				with unverbose:
					eval_results, _, _ = self.evaluator.eval(self.agent)
				eval_reward = (eval_results != -1).mean()
				self.sol_percents.append(eval_reward)
				self.tt.end_profile(f"Evaluating using agent {self.agent}")

				if eval_reward > best_solve:
					best_solve = eval_reward
					best_net = net.clone()
					self.log(f"Updated best net with solve rate {eval_reward*100:.2f} % at depth {self.evaluator.scrambling_depths}")

		self.log.section("Finished training")
		if len(self.evaluation_rollouts):
			self.log(f"Best net solves {best_solve*100:.2f} % of games at depth {self.evaluator.scrambling_depths}")
		self.log.verbose("Training time distribution")
		self.log.verbose(self.tt)
		total_time = self.tt.tock()
		eval_time = self.tt.profiles[f'Evaluating using agent {self.agent}'].sum() if len(self.evaluation_rollouts) else 0
		train_time = self.tt.profiles["Training loop"].sum()
		adi_time = self.tt.profiles["ADI training data"].sum()
		nstates = self.rollouts * self.rollout_games * self.rollout_depth * cube.action_dim
		states_per_sec = int(nstates / (adi_time+train_time))
		self.log("\n".join([
			f"Total running time:               {self.tt.stringify_time(total_time, TimeUnit.second)}",
			f"- Training data for ADI:          {self.tt.stringify_time(adi_time, TimeUnit.second)} or {adi_time/total_time*100:.2f} %",
			f"- Training time:                  {self.tt.stringify_time(train_time, TimeUnit.second)} or {train_time/total_time*100:.2f} %",
			f"- Evaluation time:                {self.tt.stringify_time(eval_time, TimeUnit.second)} or {eval_time/total_time*100:.2f} %",
			f"States witnessed incl. substates: {TickTock.thousand_seps(nstates)}",
			f"- Per training second:            {TickTock.thousand_seps(states_per_sec)}",
		]))

		return net, best_net
Beispiel #18
0
	def from_saved(cls, loc: str, use_best: bool, lambda_: float, expansions: int) -> DeepAgent:
		net = Model.load(loc, load_best=use_best).to(gpu)
		return cls(net, lambda_=lambda_, expansions=expansions)
Beispiel #19
0
	def from_saved(cls, loc: str, use_best: bool, c: float, search_graph: bool):
		net = Model.load(loc, load_best=use_best)
		net.to(gpu)
		return cls(net, c=c, search_graph=search_graph)
Beispiel #20
0
	def from_saved(cls, loc: str, use_best: bool, epsilon: float, workers: int, depth: int):
		net = Model.load(loc, load_best=use_best).to(gpu)
		return cls(net, epsilon=epsilon, workers=workers, depth=depth)
Beispiel #21
0
	def from_saved(cls, loc: str, use_best: bool):
		net = Model.load(loc, load_best=use_best)
		net.to(gpu)
		return cls(net)
Beispiel #22
0
import matplotlib.pyplot as plt

import numpy as np

from librubiks import gpu, cube, rc_params
from librubiks.model import Model
from librubiks.solving.agents import MCTS
from librubiks.utils import set_seeds, Logger, TickTock, TimeUnit

np.set_printoptions(precision=4, threshold=np.inf)
plt.rcParams.update(rc_params)

tt = TickTock()
log = Logger("data/local_analyses/mcts.log", "Analyzing MCTS")
net = Model.load("local_net").eval().to(gpu)


def solve(depth: int, c: float, time_limit: float):
    state, f, d = cube.scramble(depth, True)
    searcher = MCTS(net, c=c, search_graph=False)
    is_solved = searcher.search(state, time_limit)
    assert is_solved == (cube.get_solved().tostring() in searcher.indices)
    return is_solved, len(searcher.indices)


def analyze_var(var: str, values: np.ndarray, other_vars: dict):
    x = values
    y = []
    tree_sizes = []
    log.section(
        f"Optimizing {var}\nExpected runtime: {len(x)*time_limit*n:.2f} s\nGames per evaluation: {n}"
Beispiel #23
0
	def test_init(self):
		for init in ['glorot', 'he', 0, 1.123123123e-3]:
			cf = ModelConfig(init=init)
			model = Model.create(cf)
			x = torch.randn(2,480).to(gpu)
			model(x)