def __init__(self, state_shape, action_size): self.learning_rate = 0.001 self.state_shape = state_shape self.action_size = action_size self.gamma = 0.999 self.episilon = 0.01 self.lamb = 0.99 board_shape = state_shape[:2] self.board_shape = board_shape self.value_model = AgentModel("value", board_shape) self.target_value_model = AgentModel("target_value", board_shape) self.value_model.build(input_shape=(None,) + board_shape) self.target_value_model.build(input_shape=(None,) + board_shape) for var, var_target in zip( self.value_model.trainable_variables, self.target_value_model.trainable_variables, ): var.assign(var_target) self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate) self.loss_function = tf.keras.losses.MeanSquaredError()
def test_model(self): ones = np.ones(self.shape, dtype=np.float32) model = AgentModel("", self.shape) output = model(np.array([ones] * 10, dtype=np.float32)) np.testing.assert_almost_equal(output, [[-0.0006732]] * 10) tf.random.set_seed(0) output = model(np.array([ones] * 10, dtype=np.float32), training=True).numpy() want = [[-0.0006732]] * 10 np.testing.assert_almost_equal(output, want)
def __init__(self, num_actions, gamma, max_experiences, min_experiences, batch_size, lr, hidden_units, num_states): self.num_actions = num_actions self.batch_size = batch_size self.optimizer = tf.optimizers.Adam(lr) self.gamma = gamma self.model = AgentModel(num_actions, hidden_units, num_states) self.experience = {'s': [], 'a': [], 'r': [], 's2': [], 'done': []} self.max_experiences = max_experiences self.min_experiences = min_experiences
def pool_run_args(argses, super_dirname, output_every, t_upto, resume): runners = [] for args in argses: output_dirname = make_output_dirname(args) output_dirpath = join(super_dirname, output_dirname) if resume and get_filenames(output_dirpath): runner = Runner(output_dirpath, output_every) else: model = AgentModel(**args) runner = Runner(output_dirpath, output_every, model=model) runner.clear_dir() runners.append(runner) pool_run(runners, t_upto)
def add_model(self): """This function calls the appropriate model builder""" self.model_agent = AgentModel(12, 20, 6) self.model_critic = CriticModel(11, 21, 10, 0) self.set_model_weights(self.model_agent) self.set_model_weights(self.model_critic) self.optimizer_agent = torch.optim.Adam(self.model_agent.parameters(), lr = 0.001) self.optimizer_critic = torch.optim.Adam(self.model_critic.parameters(), lr = 0.001) self.loss_agent = torch.nn.MSELoss() self.loss_critic = torch.nn.MSELoss()
def train_val(): ''' Train on the training set, and validate on seen and unseen splits. ''' # Set which GPU to use device = torch.device('cuda', hparams.device_id) # Load hyperparameters from checkpoint (if exists) if os.path.exists(hparams.load_path): print('Load model from %s' % hparams.load_path) ckpt = load(hparams.load_path, device) start_iter = ckpt['iter'] else: if not hparams.forward_agent and not hparams.random_agent and not hparams.shortest_agent: if hasattr(hparams, 'load_path') and hasattr(hparams, 'eval_only') and hparams.eval_only: sys.exit('load_path %s does not exist!' % hparams.load_path) ckpt = None start_iter = 0 end_iter = hparams.n_iters if not hasattr(hparams, 'ask_baseline'): hparams.ask_baseline = None if not hasattr(hparams, 'instruction_baseline'): hparams.instruction_baseline = None # Set random seeds torch.manual_seed(hparams.seed) torch.cuda.manual_seed(hparams.seed) np.random.seed(hparams.seed) random.seed(hparams.seed) # Create or load vocab train_vocab_path = os.path.join(hparams.data_path, 'vocab.txt') if not os.path.exists(train_vocab_path): raise Exception('Vocab file not found at %s' % train_vocab_path) vocab = read_vocab([train_vocab_path]) hparams.instr_padding_idx = vocab.index('<PAD>') tokenizer = Tokenizer(vocab=vocab, encoding_length=hparams.max_instr_len) if hparams.encoder_type == 'dic': tokenizer = BTokenizer(vocab=vocab,encoding_length=hparams.max_instr_len) featurizer = ImageFeatures(hparams.img_features, device) simulator = Simulator(hparams) # Create train environment train_env = Batch(hparams, simulator, featurizer, tokenizer, split='train') # Create validation environments val_splits = ['val_seen', 'val_unseen'] eval_mode = hasattr(hparams, 'eval_only') and hparams.eval_only if eval_mode: if 'val_seen' in hparams.load_path: val_splits = ['test_seen'] elif 'val_unseen' in hparams.load_path: val_splits = ['test_unseen'] else: val_splits = ['test_seen', 'test_unseen'] end_iter = start_iter + 1 if hparams.eval_on_val: val_splits = [x.replace('test_', 'val_') for x in val_splits] val_envs_tmp = { split: ( Batch(hparams, simulator, featurizer, tokenizer, split=split), Evaluation(hparams, [split], hparams.data_path)) for split in val_splits } val_envs = {} for key, value in val_envs_tmp.items(): if '_seen' in key: val_envs[key + '_env_seen_anna'] = value val_envs[key + '_env_unseen_anna'] = value else: assert '_unseen' in key val_envs[key] = value # Build model and optimizer model = AgentModel(len(vocab), hparams, device).to(device) optimizer = optim.Adam(model.parameters(), lr=hparams.lr, weight_decay=hparams.weight_decay) best_metrics = { env_name : -1 for env_name in val_envs.keys() } best_metrics['combined'] = -1 # Load model paramters from checkpoint (if exists) if ckpt is not None: model.load_state_dict(ckpt['model_state_dict']) optimizer.load_state_dict(ckpt['optim_state_dict']) best_metrics = ckpt['best_metrics'] train_env.ix = ckpt['data_idx'] if hparams.log_every == -1: hparams.log_every = round(len(train_env.data) / \ (hparams.batch_size * 100)) * 100 print('') pprint(vars(hparams), width=1) print('') print(model) print('Number of parameters:', sum(p.numel() for p in model.parameters() if p.requires_grad)) if hparams.random_agent or hparams.forward_agent or hparams.shortest_agent: assert eval_mode agent = SimpleAgent(hparams) else: agent = VerbalAskAgent(model, hparams, device) return train(train_env, val_envs, agent, model, optimizer, start_iter, end_iter, best_metrics, eval_mode)