Example #1
0
	def load(load_dir: str, logger=NullLogger(), load_best=False):
		"""
		Load a model from a configuration directory
		"""

		model_path = os.path.join(load_dir, "model.pt" if not load_best else "model-best.pt")
		conf_path = os.path.join(load_dir, "config.json")
		with open(conf_path, encoding="utf-8") as conf:
			try:
				state_dict = torch.load(model_path, map_location=gpu)
			except FileNotFoundError:
				model_path = os.path.join(load_dir, "model.pt")
				state_dict = torch.load(model_path, map_location=gpu)
			config = ModelConfig.from_json_dict(json.load(conf))

		model = Model.create(config, logger)
		model.load_state_dict(state_dict)
		model.to(gpu)
		# First time the net is loaded, a feedforward is performed, as the first time is slow
		# This avoids skewing evaluation results
		with torch.no_grad():
			model.eval()
			model(cube.as_oh(cube.get_solved()))
			model.train()
		return model
Example #2
0
	def __init__(self, config: ModelConfig, logger=NullLogger()):
		super().__init__()
		self.config = config
		self.log = logger

		self._construct_net()
		self.log(f"Created network\n{self.config}\n{self}")
Example #3
0
	def __init__(self,
			# Maximizes target function
			target_function,
			parameters: dict,

			alpha: float =1e-5,
			n_restarts: int = 20,
			acquisition: str='ei',

			logger: Logger=NullLogger(),
		):
		"""Set op BO class, set up utility function (acqusition function) and gaussian process.

		:param float alpha:  Handles how much noise the GP can deal with
		:param int n_restarts: Higher => more expensive, but more accurate
		"""
		super().__init__(target_function, parameters, logger)

		self.optimizer = BayesianOptimization(
			f=None,
			pbounds=parameters,
			verbose=0,
		)
		self.optimizer.set_gp_params(alpha=alpha, n_restarts_optimizer=n_restarts)
		self.utility = UtilityFunction(kind=acquisition, kappa=2.5, xi=0.2)

		self.logger(f"Created Bayesian Optimizer with alpha = {alpha} and {n_restarts} restarts for each optimization. Acquisition function is {acquisition}.")
Example #4
0
	def create(config: ModelConfig, logger=NullLogger()):
		"""
		Allows this class to be used to instantiate other Network architectures based on the content
		of the configuartion file.
		"""
		if config.architecture.startswith("fc"):  return Model(config, logger).to(gpu)
		if config.architecture.startswith("res"): return ResNet(config, logger).to(gpu)
		if config.architecture == "conv":         return ConvNet(config, logger).to(gpu)

		raise KeyError(f"Network architecture should be 'fc_small', 'fc_big', 'res_small', 'res_big', 'conv', but '{config.architecture}' was given")
Example #5
0
	def test_save_and_load(self):
		torch.manual_seed(42)

		config = ModelConfig()
		model = Model.create(config, logger=NullLogger())
		model_dir = "local_tests/local_model_test"
		model.save(model_dir)
		assert os.path.exists(f"{model_dir}/config.json")
		assert os.path.exists(f"{model_dir}/model.pt")

		model = Model.load(model_dir).to(gpu)
		assert next(model.parameters()).device.type == gpu.type
Example #6
0
	def __init__(self,
			target_function, # Maximizes target function
			parameters: dict,
			logger: Logger=NullLogger(),
		):
		"""Set op BO class, set up utility function (acqusition function) and gaussian process.

		:param float alpha:  Handles how much noise the GP can deal with
		:param int n_restarts: Higher => more expensive, but more accurate
		"""
		super().__init__(target_function, parameters, logger)

		self.logger(f"Created grid search")
Example #7
0
	def __init__(self,
				 evaluations: np.ndarray,
				 games: int,
				 depth: int,
				 extra_evals: int,
				 reward_method: str,
				 logger: Logger = NullLogger()):
		"""Initialize containers mostly

		:param np.ndarray evaluations:  array of the evaluations performed on the model. Used for the more intensive analysis
		:param int depth: Rollout depth
		:param extra_evals: If != 0, extra evaluations are added for the first `exta_evals` rollouts

		"""

		self.games = games
		self.depth = depth
		self.depths = np.arange(depth)
		self.extra_evals = min(evaluations[-1] if len(evaluations) else 0, extra_evals) #Wont add evals in the future (or if no evals are needed)
		self.evaluations = np.unique( np.append(evaluations, range( self.extra_evals )) )
		self.reward_method = reward_method

		self.orig_params = None
		self.params = None

		self.first_states = np.stack((
				cube.get_solved(),
				*cube.multi_rotate(cube.repeat_state(cube.get_solved(), cube.action_dim), *cube.iter_actions())
				))
		self.first_states = cube.as_oh( self.first_states )
		self.first_state_values = list()

		self.substate_val_stds = list()

		self.avg_value_targets = list()
		self.param_changes = list()
		self.param_total_changes = list()

		self.policy_entropies = list()
		self.rollout_policy = list()

		self.log = logger
		self.log.verbose(f"Analysis of this training was enabled. Extra analysis is done for evaluations and for first {extra_evals} rollouts")
Example #8
0
	def __init__(self,
		         n_games,
		         scrambling_depths: range or list,
		         max_time = None,  # Max time to completion per game
		         max_states = None,  # The max number of states to explore per game
		         logger: Logger = NullLogger()
		):

		self.n_games = n_games
		self.max_time = max_time
		self.max_states = max_states

		self.tt = TickTock()
		self.log = logger
		# Use array of scrambling of scrambling depths if not deep evaluation else just a one element array with 0
		self.scrambling_depths = np.array(scrambling_depths) if scrambling_depths != range(0) else np.array([0])

		self.log("\n".join([
			"Creating evaluator",
			f"Games per scrambling depth: {self.n_games}",
			f"Scrambling depths: {scrambling_depths if self._isdeep() else 'Uniformly sampled in [100, 999]'}",
		]))
Example #9
0
	def __init__(self,
			# Maximizes target function
			target_function,
			parameters: dict, #str name : tuple limits

			logger: Logger=NullLogger(),
		):
		self.target_function = target_function
		self.parameters = parameters

		self.optimal = None
		self.highscore = None

		# For evaluation use
		self.evaluator = None
		self.persistent_agent_params = None
		self.agent_class = None
		self.param_prepper = None

		self.score_history = list()
		self.parameter_history = list()

		self.logger = logger
		self.logger.log(f"Optimizer {self} created parameters: {self.format_params(self.parameters)}")
Example #10
0
	def __init__(self,
				 rollouts: int,
				 batch_size: int,  # Required to be > 1 when training with batchnorm
				 rollout_games: int,
				 rollout_depth: int,
				 optim_fn,
				 alpha_update: float,
				 lr: float,
				 gamma: float,
				 update_interval: int,
				 agent: DeepAgent,
				 evaluator: Evaluator,
				 evaluation_interval: int,
				 with_analysis: bool,
				 tau: float,
				 reward_method: str,
				 policy_criterion	= torch.nn.CrossEntropyLoss,
				 value_criterion	= torch.nn.MSELoss,
				 logger: Logger		= NullLogger(),
				 ):
		"""Sets up evaluation array, instantiates critera and stores and documents settings


		:param bool with_analysis: If true, a number of statistics relating to loss behaviour and model output are stored.
		:param float alpha_update: alpha <- alpha + alpha_update every update_interval rollouts (excl. rollout 0)
		:param float gamma: lr <- lr * gamma every update_interval rollouts (excl. rollout 0)
		:param float tau: How much of the new network to use to generate ADI data
		"""
		self.rollouts = rollouts
		self.train_rollouts = np.arange(self.rollouts)
		self.batch_size = self.states_per_rollout if not batch_size else batch_size
		self.rollout_games = rollout_games
		self.rollout_depth = rollout_depth
		self.adi_ff_batches = 1  # Number of batches used for feedforward in ADI_traindata. Used to limit vram usage
		self.reward_method = reward_method

		# Perform evaluation every evaluation_interval and after last rollout
		if evaluation_interval:
			self.evaluation_rollouts = np.arange(0, self.rollouts, evaluation_interval)-1
			if evaluation_interval == 1:
				self.evaluation_rollouts = self.evaluation_rollouts[1:]
			else:
				self.evaluation_rollouts[0] = 0
			if self.rollouts-1 != self.evaluation_rollouts[-1]:
				self.evaluation_rollouts = np.append(self.evaluation_rollouts, self.rollouts-1)
		else:
			self.evaluation_rollouts = np.array([])
		self.agent = agent

		self.tau = tau
		self.alpha_update = alpha_update
		self.lr	= lr
		self.gamma = gamma
		self.update_interval = update_interval  # How often alpha and lr are updated

		self.optim = optim_fn
		self.policy_criterion = policy_criterion(reduction='none')
		self.value_criterion = value_criterion(reduction='none')

		self.evaluator = evaluator
		self.log = logger
		self.log("\n".join([
			"Created trainer",
			f"Alpha update: {self.alpha_update:.2f}",
			f"Learning rate and gamma: {self.lr} and {self.gamma}",
			f"Learning rate and alpha will update every {self.update_interval} rollouts: lr <- {self.gamma:.4f} * lr and alpha += {self.alpha_update:.4f}"\
				if self.update_interval else "Learning rate and alpha will not be updated during training",
			f"Optimizer:      {self.optim}",
			f"Policy and value criteria: {self.policy_criterion} and {self.value_criterion}",
			f"Rollouts:       {self.rollouts}",
			f"Batch size:     {self.batch_size}",
			f"Rollout games:  {self.rollout_games}",
			f"Rollout depth:  {self.rollout_depth}",
			f"alpha update:   {self.alpha_update}",
		]))

		self.with_analysis = with_analysis
		if self.with_analysis:
			self.analysis = TrainAnalysis(self.evaluation_rollouts, self.rollout_games, self.rollout_depth, extra_evals=100, reward_method=reward_method, logger=self.log) #Logger should not be set in standard use

		self.tt = TickTock()