Example #1
0
    def __init__(self, state_shape, action_size):
        self.learning_rate = 0.001
        self.state_shape = state_shape
        self.action_size = action_size
        self.gamma = 0.999
        self.episilon = 0.01
        self.lamb = 0.99

        board_shape = state_shape[:2]
        self.board_shape = board_shape

        self.value_model = AgentModel("value", board_shape)
        self.target_value_model = AgentModel("target_value", board_shape)

        self.value_model.build(input_shape=(None,) + board_shape)
        self.target_value_model.build(input_shape=(None,) + board_shape)

        for var, var_target in zip(
            self.value_model.trainable_variables,
            self.target_value_model.trainable_variables,
        ):
            var.assign(var_target)

        self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)
        self.loss_function = tf.keras.losses.MeanSquaredError()
Example #2
0
    def test_model(self):
        ones = np.ones(self.shape, dtype=np.float32)
        model = AgentModel("", self.shape)
        output = model(np.array([ones] * 10, dtype=np.float32))
        np.testing.assert_almost_equal(output, [[-0.0006732]] * 10)

        tf.random.set_seed(0)
        output = model(np.array([ones] * 10, dtype=np.float32), training=True).numpy()
        want = [[-0.0006732]] * 10
        np.testing.assert_almost_equal(output, want)
Example #3
0
 def __init__(self, num_actions, gamma, max_experiences, min_experiences,
              batch_size, lr, hidden_units, num_states):
     self.num_actions = num_actions
     self.batch_size = batch_size
     self.optimizer = tf.optimizers.Adam(lr)
     self.gamma = gamma
     self.model = AgentModel(num_actions, hidden_units, num_states)
     self.experience = {'s': [], 'a': [], 'r': [], 's2': [], 'done': []}
     self.max_experiences = max_experiences
     self.min_experiences = min_experiences
Example #4
0
def pool_run_args(argses, super_dirname, output_every, t_upto, resume):
    runners = []
    for args in argses:
        output_dirname = make_output_dirname(args)
        output_dirpath = join(super_dirname, output_dirname)
        if resume and get_filenames(output_dirpath):
            runner = Runner(output_dirpath, output_every)
        else:
            model = AgentModel(**args)
            runner = Runner(output_dirpath, output_every, model=model)
            runner.clear_dir()
        runners.append(runner)
    pool_run(runners, t_upto)
Example #5
0
    def add_model(self):
        """This function calls the appropriate model builder"""
        
        self.model_agent = AgentModel(12, 20, 6)

        self.model_critic = CriticModel(11, 21, 10, 0)

        self.set_model_weights(self.model_agent)

        self.set_model_weights(self.model_critic)

        self.optimizer_agent = torch.optim.Adam(self.model_agent.parameters(),
                                               lr = 0.001)

        self.optimizer_critic = torch.optim.Adam(self.model_critic.parameters(),
                                               lr = 0.001)

        self.loss_agent = torch.nn.MSELoss()

        self.loss_critic = torch.nn.MSELoss()
Example #6
0
def train_val():
    ''' Train on the training set, and validate on seen and unseen splits. '''

    # Set which GPU to use
    device = torch.device('cuda', hparams.device_id)

    # Load hyperparameters from checkpoint (if exists)
    if os.path.exists(hparams.load_path):
        print('Load model from %s' % hparams.load_path)
        ckpt = load(hparams.load_path, device)
        start_iter = ckpt['iter']
    else:
        if not hparams.forward_agent and not hparams.random_agent and not hparams.shortest_agent:
            if hasattr(hparams, 'load_path') and hasattr(hparams, 'eval_only') and hparams.eval_only:
                sys.exit('load_path %s does not exist!' % hparams.load_path)
        ckpt = None
    start_iter = 0
    end_iter = hparams.n_iters

    if not hasattr(hparams, 'ask_baseline'):
        hparams.ask_baseline = None
    if not hasattr(hparams, 'instruction_baseline'):
        hparams.instruction_baseline = None

    # Set random seeds
    torch.manual_seed(hparams.seed)
    torch.cuda.manual_seed(hparams.seed)
    np.random.seed(hparams.seed)
    random.seed(hparams.seed)

    # Create or load vocab
    train_vocab_path = os.path.join(hparams.data_path, 'vocab.txt')
    if not os.path.exists(train_vocab_path):
        raise Exception('Vocab file not found at %s' % train_vocab_path)
    vocab = read_vocab([train_vocab_path])
    hparams.instr_padding_idx = vocab.index('<PAD>')

    tokenizer = Tokenizer(vocab=vocab, encoding_length=hparams.max_instr_len)
    if hparams.encoder_type == 'dic':
        tokenizer = BTokenizer(vocab=vocab,encoding_length=hparams.max_instr_len)
    featurizer = ImageFeatures(hparams.img_features, device)
    simulator = Simulator(hparams)

    # Create train environment
    train_env = Batch(hparams, simulator, featurizer, tokenizer, split='train')

    # Create validation environments
    val_splits = ['val_seen', 'val_unseen']
    eval_mode = hasattr(hparams, 'eval_only') and hparams.eval_only
    if eval_mode:
        if 'val_seen' in hparams.load_path:
            val_splits = ['test_seen']
        elif 'val_unseen' in hparams.load_path:
            val_splits = ['test_unseen']
        else:
            val_splits = ['test_seen', 'test_unseen']
        end_iter = start_iter + 1

    if hparams.eval_on_val:
        val_splits = [x.replace('test_', 'val_') for x in val_splits]

    val_envs_tmp = { split: (
        Batch(hparams, simulator, featurizer, tokenizer, split=split),
        Evaluation(hparams, [split], hparams.data_path))
            for split in val_splits }

    val_envs = {}
    for key, value in val_envs_tmp.items():
        if '_seen' in key:
            val_envs[key + '_env_seen_anna'] = value
            val_envs[key + '_env_unseen_anna'] = value
        else:
            assert '_unseen' in key
            val_envs[key] = value

    # Build model and optimizer
    model = AgentModel(len(vocab), hparams, device).to(device)
    optimizer = optim.Adam(model.parameters(), lr=hparams.lr,
        weight_decay=hparams.weight_decay)

    best_metrics = { env_name  : -1 for env_name in val_envs.keys() }
    best_metrics['combined'] = -1

    # Load model paramters from checkpoint (if exists)
    if ckpt is not None:
        model.load_state_dict(ckpt['model_state_dict'])
        optimizer.load_state_dict(ckpt['optim_state_dict'])
        best_metrics = ckpt['best_metrics']
        train_env.ix = ckpt['data_idx']

    if hparams.log_every == -1:
        hparams.log_every = round(len(train_env.data) / \
            (hparams.batch_size * 100)) * 100

    print('')
    pprint(vars(hparams), width=1)
    print('')
    print(model)
    print('Number of parameters:',
        sum(p.numel() for p in model.parameters() if p.requires_grad))

    if hparams.random_agent or hparams.forward_agent or hparams.shortest_agent:
        assert eval_mode
        agent = SimpleAgent(hparams)
    else:
        agent = VerbalAskAgent(model, hparams, device)

    return train(train_env, val_envs, agent, model, optimizer, start_iter,
        end_iter, best_metrics, eval_mode)