def load(load_dir: str, logger=NullLogger(), load_best=False): """ Load a model from a configuration directory """ model_path = os.path.join(load_dir, "model.pt" if not load_best else "model-best.pt") conf_path = os.path.join(load_dir, "config.json") with open(conf_path, encoding="utf-8") as conf: try: state_dict = torch.load(model_path, map_location=gpu) except FileNotFoundError: model_path = os.path.join(load_dir, "model.pt") state_dict = torch.load(model_path, map_location=gpu) config = ModelConfig.from_json_dict(json.load(conf)) model = Model.create(config, logger) model.load_state_dict(state_dict) model.to(gpu) # First time the net is loaded, a feedforward is performed, as the first time is slow # This avoids skewing evaluation results with torch.no_grad(): model.eval() model(cube.as_oh(cube.get_solved())) model.train() return model
def __init__(self, config: ModelConfig, logger=NullLogger()): super().__init__() self.config = config self.log = logger self._construct_net() self.log(f"Created network\n{self.config}\n{self}")
def __init__(self, # Maximizes target function target_function, parameters: dict, alpha: float =1e-5, n_restarts: int = 20, acquisition: str='ei', logger: Logger=NullLogger(), ): """Set op BO class, set up utility function (acqusition function) and gaussian process. :param float alpha: Handles how much noise the GP can deal with :param int n_restarts: Higher => more expensive, but more accurate """ super().__init__(target_function, parameters, logger) self.optimizer = BayesianOptimization( f=None, pbounds=parameters, verbose=0, ) self.optimizer.set_gp_params(alpha=alpha, n_restarts_optimizer=n_restarts) self.utility = UtilityFunction(kind=acquisition, kappa=2.5, xi=0.2) self.logger(f"Created Bayesian Optimizer with alpha = {alpha} and {n_restarts} restarts for each optimization. Acquisition function is {acquisition}.")
def create(config: ModelConfig, logger=NullLogger()): """ Allows this class to be used to instantiate other Network architectures based on the content of the configuartion file. """ if config.architecture.startswith("fc"): return Model(config, logger).to(gpu) if config.architecture.startswith("res"): return ResNet(config, logger).to(gpu) if config.architecture == "conv": return ConvNet(config, logger).to(gpu) raise KeyError(f"Network architecture should be 'fc_small', 'fc_big', 'res_small', 'res_big', 'conv', but '{config.architecture}' was given")
def test_save_and_load(self): torch.manual_seed(42) config = ModelConfig() model = Model.create(config, logger=NullLogger()) model_dir = "local_tests/local_model_test" model.save(model_dir) assert os.path.exists(f"{model_dir}/config.json") assert os.path.exists(f"{model_dir}/model.pt") model = Model.load(model_dir).to(gpu) assert next(model.parameters()).device.type == gpu.type
def __init__(self, target_function, # Maximizes target function parameters: dict, logger: Logger=NullLogger(), ): """Set op BO class, set up utility function (acqusition function) and gaussian process. :param float alpha: Handles how much noise the GP can deal with :param int n_restarts: Higher => more expensive, but more accurate """ super().__init__(target_function, parameters, logger) self.logger(f"Created grid search")
def __init__(self, evaluations: np.ndarray, games: int, depth: int, extra_evals: int, reward_method: str, logger: Logger = NullLogger()): """Initialize containers mostly :param np.ndarray evaluations: array of the evaluations performed on the model. Used for the more intensive analysis :param int depth: Rollout depth :param extra_evals: If != 0, extra evaluations are added for the first `exta_evals` rollouts """ self.games = games self.depth = depth self.depths = np.arange(depth) self.extra_evals = min(evaluations[-1] if len(evaluations) else 0, extra_evals) #Wont add evals in the future (or if no evals are needed) self.evaluations = np.unique( np.append(evaluations, range( self.extra_evals )) ) self.reward_method = reward_method self.orig_params = None self.params = None self.first_states = np.stack(( cube.get_solved(), *cube.multi_rotate(cube.repeat_state(cube.get_solved(), cube.action_dim), *cube.iter_actions()) )) self.first_states = cube.as_oh( self.first_states ) self.first_state_values = list() self.substate_val_stds = list() self.avg_value_targets = list() self.param_changes = list() self.param_total_changes = list() self.policy_entropies = list() self.rollout_policy = list() self.log = logger self.log.verbose(f"Analysis of this training was enabled. Extra analysis is done for evaluations and for first {extra_evals} rollouts")
def __init__(self, n_games, scrambling_depths: range or list, max_time = None, # Max time to completion per game max_states = None, # The max number of states to explore per game logger: Logger = NullLogger() ): self.n_games = n_games self.max_time = max_time self.max_states = max_states self.tt = TickTock() self.log = logger # Use array of scrambling of scrambling depths if not deep evaluation else just a one element array with 0 self.scrambling_depths = np.array(scrambling_depths) if scrambling_depths != range(0) else np.array([0]) self.log("\n".join([ "Creating evaluator", f"Games per scrambling depth: {self.n_games}", f"Scrambling depths: {scrambling_depths if self._isdeep() else 'Uniformly sampled in [100, 999]'}", ]))
def __init__(self, # Maximizes target function target_function, parameters: dict, #str name : tuple limits logger: Logger=NullLogger(), ): self.target_function = target_function self.parameters = parameters self.optimal = None self.highscore = None # For evaluation use self.evaluator = None self.persistent_agent_params = None self.agent_class = None self.param_prepper = None self.score_history = list() self.parameter_history = list() self.logger = logger self.logger.log(f"Optimizer {self} created parameters: {self.format_params(self.parameters)}")
def __init__(self, rollouts: int, batch_size: int, # Required to be > 1 when training with batchnorm rollout_games: int, rollout_depth: int, optim_fn, alpha_update: float, lr: float, gamma: float, update_interval: int, agent: DeepAgent, evaluator: Evaluator, evaluation_interval: int, with_analysis: bool, tau: float, reward_method: str, policy_criterion = torch.nn.CrossEntropyLoss, value_criterion = torch.nn.MSELoss, logger: Logger = NullLogger(), ): """Sets up evaluation array, instantiates critera and stores and documents settings :param bool with_analysis: If true, a number of statistics relating to loss behaviour and model output are stored. :param float alpha_update: alpha <- alpha + alpha_update every update_interval rollouts (excl. rollout 0) :param float gamma: lr <- lr * gamma every update_interval rollouts (excl. rollout 0) :param float tau: How much of the new network to use to generate ADI data """ self.rollouts = rollouts self.train_rollouts = np.arange(self.rollouts) self.batch_size = self.states_per_rollout if not batch_size else batch_size self.rollout_games = rollout_games self.rollout_depth = rollout_depth self.adi_ff_batches = 1 # Number of batches used for feedforward in ADI_traindata. Used to limit vram usage self.reward_method = reward_method # Perform evaluation every evaluation_interval and after last rollout if evaluation_interval: self.evaluation_rollouts = np.arange(0, self.rollouts, evaluation_interval)-1 if evaluation_interval == 1: self.evaluation_rollouts = self.evaluation_rollouts[1:] else: self.evaluation_rollouts[0] = 0 if self.rollouts-1 != self.evaluation_rollouts[-1]: self.evaluation_rollouts = np.append(self.evaluation_rollouts, self.rollouts-1) else: self.evaluation_rollouts = np.array([]) self.agent = agent self.tau = tau self.alpha_update = alpha_update self.lr = lr self.gamma = gamma self.update_interval = update_interval # How often alpha and lr are updated self.optim = optim_fn self.policy_criterion = policy_criterion(reduction='none') self.value_criterion = value_criterion(reduction='none') self.evaluator = evaluator self.log = logger self.log("\n".join([ "Created trainer", f"Alpha update: {self.alpha_update:.2f}", f"Learning rate and gamma: {self.lr} and {self.gamma}", f"Learning rate and alpha will update every {self.update_interval} rollouts: lr <- {self.gamma:.4f} * lr and alpha += {self.alpha_update:.4f}"\ if self.update_interval else "Learning rate and alpha will not be updated during training", f"Optimizer: {self.optim}", f"Policy and value criteria: {self.policy_criterion} and {self.value_criterion}", f"Rollouts: {self.rollouts}", f"Batch size: {self.batch_size}", f"Rollout games: {self.rollout_games}", f"Rollout depth: {self.rollout_depth}", f"alpha update: {self.alpha_update}", ])) self.with_analysis = with_analysis if self.with_analysis: self.analysis = TrainAnalysis(self.evaluation_rollouts, self.rollout_games, self.rollout_depth, extra_evals=100, reward_method=reward_method, logger=self.log) #Logger should not be set in standard use self.tt = TickTock()