예제 #1
0
    def _build_model(cls, config):
        file_path = join(data.workspace.word_vectors, config.model.wvec_path)
        word_embeddings = SimpleEmbeddings.from_file(
            file_path,
            config.model.word_dim,
            vocab_size=config.model.vocab_size)
        word_embeddings = word_embeddings.with_special_tokens()
        token_embedder = TokenEmbedder(word_embeddings)

        model = None
        if config.model.type == 0:  # regular language model
            model = LanguageModel(token_embedder, config.model.hidden_dim,
                                  config.model.agenda_dim,
                                  config.model.num_layers, cls._make_logger())
        elif config.model.type == 1:  # SVAE
            model = NoisyLanguageModel(
                token_embedder, config.model.hidden_dim,
                config.model.agenda_dim, config.model.num_layers,
                config.model.kl_weight_steps, config.model.kl_weight_rate,
                config.model.kl_weight_cap, config.model.dci_keep_rate,
                cls._make_logger())
        assert model is not None

        model = try_gpu(model)
        optimizer = optim.Adam(model.parameters(),
                               lr=config.optim.learning_rate)
        return model, optimizer
예제 #2
0
    def _build_editor(cls, config, num_iter, eps, momentum):
        """Build Editor.

        Args:
            config (Config): Editor config

        Returns:
            Editor
        """

        file_path = join(data.workspace.word_vectors, config.wvec_path)
        word_embeddings = SimpleEmbeddings.from_file(file_path, config.word_dim, vocab_size=config.vocab_size)
        word_embeddings = word_embeddings.with_special_tokens()
        source_token_embedder = TokenEmbedder(word_embeddings)
        target_token_embedder = TokenEmbedder(word_embeddings)

        if config.decoder_cell == 'SimpleDecoderCell':
            decoder_cell = SimpleDecoderCell(target_token_embedder, config.hidden_dim,
                                             config.word_dim, config.agenda_dim)
        elif config.decoder_cell == 'AttentionDecoderCell':
            decoder_cell = AttentionDecoderCell(target_token_embedder, config.agenda_dim,
                                                config.hidden_dim, config.hidden_dim,
                                                config.attention_dim, config.no_insert_delete_attn,
                                                num_layers=config.decoder_layers)
        else:
            raise ValueError('{} not implemented'.format(config.decoder_cell))
        editor = Editor(source_token_embedder, config.hidden_dim, config.agenda_dim, config.edit_dim, config.lamb_reg, config.norm_eps, config.norm_max, config.kill_edit, decoder_cell, config.encoder_layers, num_iter, eps, momentum)
        editor = try_gpu(editor)
        return editor
예제 #3
0
    def _truncate_extension_probs(cls, extension_probs, beam_size):
        """For each example, keep only the k highest scoring extension probs.

        Where k = beam_size.

        Args:
            extension_probs (np.ndarray): of shape (batch_size, vocab_size)
            beam_size (int)

        Returns:
            extension_probs_sorted (np.ndarray): of shape (batch_size, beam_size). Like extension_probs, but each
                row is sorted in descending probability, and truncated to a length of beam_size.
            original_indices (np.ndarray): of shape (batch_size, beam_size).
                original_indices[i, j] = the original column index of the probability value at extension_probs_sorted[i, j]
        """
        extension_probs_var = try_gpu(Variable((torch.from_numpy(extension_probs)), volatile=True))
        extension_probs_sorted_var, original_indices_var = torch.sort(extension_probs_var, 1, descending=True)
        extension_probs_sorted_var = extension_probs_sorted_var[:, :beam_size]
        original_indices_var = original_indices_var[:, :beam_size]

        from_var = lambda v: v.data.cpu().numpy()
        extension_probs_sorted = from_var(extension_probs_sorted_var)
        original_indices = from_var(original_indices_var)

        # batch_size, vocab_size = extension_probs.shape
        # original_indices = np.argsort(-extension_probs, axis=1)  # (batch_size, vocab_size)
        # original_indices = original_indices[:, :beam_size]  # (batch_size, beam_size)
        #
        # j_indices, i_indices = np.meshgrid(np.arange(beam_size), np.arange(batch_size))  # (batch_size, beam_size)
        #
        # extension_probs_sorted = extension_probs[i_indices, original_indices]  # (batch_size, beam_size)

        return extension_probs_sorted, original_indices
예제 #4
0
    def _build_model(config, training_examples):
        # build scorer
        model_config = config.retriever
        embeds_path = join(data.workspace.word_vectors, 'glove.6B.{}d.txt'.format(model_config.word_dim))
        word_embeds = SimpleEmbeddings.from_file(embeds_path, model_config.word_dim, model_config.vocab_size)
        word_embeds = word_embeds.with_special_tokens()

        def seq_embedder(trainable):
            sent_dim = model_config.sent_dim
            token_embedder = TokenEmbedder(word_embeds, trainable)
            if trainable:
                transform = Linear(token_embedder.embed_dim, sent_dim)  # if trainable, also add a linear transform
            else:
                transform = lambda x: x
            return BOWSequenceEmbedder(token_embedder, embed_dim=sent_dim,
                                       pool=model_config.pool_method, transform=transform)

        neg_sampler = UniformNegativeSampler(training_examples)
        input_embedder = seq_embedder(trainable=model_config.train_input)
        output_embedder = seq_embedder(trainable=model_config.train_output)
        scorer = Seq2SeqScorer(input_embedder, output_embedder, neg_sampler,
                               score_method=model_config.score_method, loss_method=model_config.loss_method)
        scorer = try_gpu(scorer)

        # build optimizer
        optimizer = optim.Adam(scorer.parameters(), lr=config.optim.learning_rate)
        return scorer, optimizer
예제 #5
0
 def _create_model(self):
     config = self.config
     self.model = create_model(config)
     self.model = try_gpu(self.model)
     self.optimizer = optim.Adam(self.model.parameters(),
             lr=self.config.train.learning_rate,
             weight_decay=self.config.train.l2_reg)
     self.gradient_clip = config.train.gradient_clip
예제 #6
0
 def clone(self):
     config = self._config
     dqn = try_gpu(Policy.from_config(config.policy, self._num_actions))
     dqn._Q.load_state_dict(self._dqn._Q.state_dict())
     dqn._target_Q.load_state_dict(self._dqn._target_Q.state_dict())
     replay_buffer = ReplayBuffer(config.buffer_max_size)
     optimizer = optim.Adam(dqn.parameters(), lr=config.learning_rate)
     return Skill(dqn, replay_buffer, optimizer, self.name + "-clone",
                  config.sync_target_freq, config.min_buffer_size,
                  config.batch_size, config.grad_steps_per_update,
                  config.max_grad_norm, self._num_actions, config)
    def _build_model(cls, model_config, optim_config, data_config):
        """Build Editor.

        Args:
            model_config (Config): Editor config
            optim_config (Config): optimization config
            data_config (Config): dataset config

        Returns:
            Editor
        """
        file_path = join(data.workspace.word_vectors, model_config.wvec_path)
        word_embeddings = load_embeddings(file_path, model_config.word_dim,
                                          model_config.vocab_size,
                                          model_config.num_copy_tokens)
        word_dim = word_embeddings.embed_dim

        source_token_embedder = TokenEmbedder(word_embeddings,
                                              model_config.train_source_embeds)
        target_token_embedder = TokenEmbedder(word_embeddings,
                                              model_config.train_target_embeds)

        # number of input channels
        num_inputs = len(data_config.source_cols)

        decoder_cell = AttentionDecoderCell(
            target_token_embedder,
            2 *
            word_dim,  # 2 * word_dim because we concat base and copy vectors
            model_config.agenda_dim,
            model_config.hidden_dim,
            model_config.hidden_dim,
            model_config.attention_dim,
            num_layers=model_config.decoder_layers,
            num_inputs=num_inputs,
            dropout_prob=model_config.decoder_dropout_prob,
            disable_attention=False)

        encoder = Encoder(word_dim, model_config.agenda_dim,
                          model_config.hidden_dim, model_config.encoder_layers,
                          num_inputs, model_config.encoder_dropout_prob, False)

        copy_len = [5, 5, 40]
        model = Editor(source_token_embedder,
                       encoder,
                       decoder_cell,
                       copy_lens=copy_len)
        model = try_gpu(model)

        optimizer = optim.Adam(model.parameters(),
                               lr=optim_config.learning_rate)

        return model, optimizer
예제 #8
0
 def from_config(cls, config, num_actions, name):
     dqn = try_gpu(Policy.from_config(config.policy, num_actions))
     replay_buffer = ReplayBuffer.from_config(config.buffer)
     imitation_buffer = None
     if config.imitation:
         imitation_buffer = ReplayBuffer.from_config(config.buffer)
     optimizer = optim.Adam(dqn.parameters(), lr=config.learning_rate)
     return cls(dqn, replay_buffer, imitation_buffer, optimizer, name,
                config.sync_target_freq, config.min_buffer_size,
                config.batch_size, config.grad_steps_per_update,
                config.max_grad_norm, num_actions, config.adaptive_update,
                config.epsilon_clipping, config.max_worker_reward,
                config.dqn_vmax, config.dqn_vmin, config)
예제 #9
0
 def crop(state):
     abstract_state = AS.AbstractState(state)
     y = int(abstract_state.pixel_y * 84. / 210.)
     x = int(abstract_state.pixel_x * 84. / 160.)
     cropped = state.pixel_state[:, y - 10:y + 10,
                                 max(0, x - 30):x + 30]
     padding = (0, 0)
     if x - 30 < 0:
         padding = (30 - x, 0)
     elif x + 30 > 160:
         padding = (0, 190 - x)
     cropped = torch.FloatTensor(cropped)
     cropped = try_gpu(
         torch.nn.functional.pad(cropped, padding, mode="reflect"))
     return cropped
예제 #10
0
    def _build_editor(cls, model_config, data_config, word_embeddings,
                      word_dim, vae_mode):
        source_token_embedder = TokenEmbedder(word_embeddings,
                                              model_config.train_source_embeds)
        target_token_embedder = TokenEmbedder(word_embeddings,
                                              model_config.train_target_embeds)

        # number of input channels
        if vae_mode:
            num_inputs = len(data_config.source_cols)
        else:  #edit model uses num_inputs + num_inputs + 1
            num_inputs = len(data_config.source_cols) * 2 + 1

        decoder_cell = AttentionDecoderCell(
            target_token_embedder,
            2 * word_dim,
            # 2 * word_dim because we concat base and copy vectors
            model_config.agenda_dim,
            model_config.hidden_dim,
            model_config.hidden_dim,
            model_config.attention_dim,
            num_layers=model_config.decoder_layers,
            num_inputs=num_inputs,
            dropout_prob=model_config.decoder_dropout_prob,
            disable_attention=vae_mode)

        if vae_mode:
            encoder = Encoder(word_dim, model_config.agenda_dim,
                              model_config.hidden_dim,
                              model_config.encoder_layers, num_inputs,
                              model_config.encoder_dropout_prob, vae_mode,
                              model_config.vae_kappa)
        else:
            encoder = Encoder(word_dim, model_config.agenda_dim,
                              model_config.hidden_dim,
                              model_config.encoder_layers, num_inputs,
                              model_config.encoder_dropout_prob, vae_mode)

        vae_copy_len = [5, 10, 185]
        editor_copy_len = [5, 10, 10, 5, 10, 10, 150]
        if vae_mode:
            model = Editor(source_token_embedder, encoder, decoder_cell,
                           vae_copy_len)
        else:
            model = Editor(source_token_embedder, encoder, decoder_cell,
                           editor_copy_len)
        model = try_gpu(model)
        return model
예제 #11
0
    def __init__(self, config, save_dir):
        super(SystematicExplorationTrainingRun,
              self).__init__(config, save_dir)

        def make_env(i):
            def _thunk():
                # Only ever try to visualize index 0
                if i == 0:
                    return OriginalPixelsWrapper(get_env(config.env))
                return get_env(config.env)

            return _thunk

        configure_abstract_state(config.env.domain)
        self._nproc = config.num_processes
        self._env = SubprocVecEnv([make_env(i) for i in range(self._nproc)])

        Traverse.configure(config.edge_expansion_coeff)
        self._policy = try_gpu(
            Master.from_config(config.policy, self._env.action_space.n,
                               self._env.reset()[0], self._nproc,
                               config.env.domain))
        self._max_episodes = config.max_episodes
        self._video_freq = config.video_freq
        self._text_freq = config.text_freq
        self._stats_freq = config.stats_freq
        self._eval_freq = config.eval_freq
        self._eval_video_freq = config.eval_video_freq
        self._checkpoint_freq = config.checkpoint_freq
        self._max_checkpoints = config.max_checkpoints
        self._permanent_checkpoint = config.get("permanent_checkpoint", None)
        self.workspace.add_dir("visualizations", "visualizations")
        self.workspace.add_dir("traces", "traces")
        self.workspace.add_dir("video", "video")
        self.workspace.add_file("log", "log")
        logging.basicConfig(filename=self.workspace.log)
        self._checkpoint_number = 0

        self._teleport_frames = 0
        self._true_frames = 0  # Number of actually frames run, excludes teleport
        self._dead_frames = 0  # Number of frames simulated on dead edges
        self._dead_episodes = 0  # Number of episodes simulated on dead edges
        self._episode_nums = [0] * self._nproc  # Episode number of each proc
        self._best_reward = 0.  # Highest reward episode seen so far for proc 0

        self._load_latest_checkpoint()
예제 #12
0
    def __init__(self, config, save_dir):
        super(DQNTrainingRun, self).__init__(config, save_dir)

        self._dqn = try_gpu(
            DQNPolicy.from_config(config.policy, self._env.action_space.n))
        optimizer = optim.Adam(self._dqn.parameters(), lr=config.learning_rate)
        self._train_state = self.checkpoints.load_latest(self._dqn, optimizer)

        self._replay_buffer = ReplayBuffer.from_config(config.buffer)

        # See configs/default-base.txt for documentation about these
        self._max_episode_len = config.max_episode_len
        self._buffer_size_start = config.buffer_size_start
        self._batch_size = config.batch_size
        self._sync_target_freq = config.sync_target_freq
        self._evaluate_freq = config.evaluate_freq
        self._episodes_to_evaluate = config.episodes_to_evaluate
        self._max_frames = config.max_frames
        self._update_freq = config.update_freq
        self._max_grad_norm = config.max_grad_norm

        self.workspace.add_dir("video", "video")
예제 #13
0
    def backward_mom(self, grad_output, verbose=False):
        input_kp1_grad = [g.data for g in grad_output]
        p_kp1_grad = [torch.zeros(x.size()).type_as(x) for x in input_kp1_grad]
        rev_iters = self.iters
        for k in reversed(range(rev_iters)):
            lr_k_list = [lr for lr in self.lr]
            input_k_grad = input_kp1_grad
            p_kp1_grad = [
                p + lr_k * x
                for p, x, lr_k in zip(p_kp1_grad, input_kp1_grad, lr_k_list)
            ]
            input_k_rv = []
            input_H_xx_v = []
            r = self.eps
            for i in range(len(p_kp1_grad)):
                v = p_kp1_grad[i]
                x_k = self.input_cache[i][k]
                x_k_rv = Variable((x_k + r * v).type_as(x_k),
                                  requires_grad=True)
                input_k_rv.append(x_k_rv)
            """
            if self.acc_param_grads:
                all_input_params = input_k_rv + self.params
            else:
                all_input_params = input_k_rv
            """
            torch.manual_seed(int(self.seeds[k]))

            mean, logvar = input_k_rv
            z_samples = self.encoder._reparameterize(mean, logvar,
                                                     self.all_z[k])
            self.encoder_output = EncoderOutput(
                self.encoder_output.source_embeds,
                self.encoder_output.insert_embeds,
                self.encoder_output.delete_embeds, z_samples)
            loss = self.decoder.loss(self.encoder_output, self.y)

            if self.acc_param_grads:
                all_input_params = input_k_rv + self.params
            else:
                all_input_params = input_k_rv
            #loss = self.loss_fn(input_k_rv, self.y, self.model, self.all_z[k])
            #all_grads_rv_k = torch.autograd.grad(loss, all_input_params, retain_graph=True)

            loss.backward(retain_variables=True)
            all_grads_rv_k = [i.grad for i in all_input_params]

            if self.max_grad_norm > 0:
                self.clip_grad_norm([g.data for g in all_grads_rv_k],
                                    self.max_grad_norm)

            input_grads_rv_k = all_grads_rv_k[:len(input_k_rv)]
            param_grads_rv_k = all_grads_rv_k[len(input_k_rv):]

            if self.acc_param_grads:
                H_wx_v_list = []
                for i, p_grad_rv_k in enumerate(param_grads_rv_k):
                    H_wx_v = (p_grad_rv_k.data -
                              try_gpu(self.param_grads[i][k])) / r
                    H_wx_v_list.append(H_wx_v)
                    if self.params[i].grad is None:
                        # try removing GPU below
                        self.params[i].grad = GPUVariable(
                            torch.zeros(self.params[i].size()).type_as(
                                self.params[i].data))
                if self.max_grad_norm > 0:
                    self.clip_grad_norm(H_wx_v_list, self.max_grad_norm)
                for i in range(len(self.params)):
                    self.params[i].grad.data += -H_wx_v_list[i]
            for i, x_k_rv_grad in enumerate(input_grads_rv_k):
                H_xx_v = (x_k_rv_grad.data - self.input_grads[i][k]) / r
                input_H_xx_v.append(H_xx_v)
            input_kp1_grad = [
                x_kp1_grad - H_xx_v
                for (x_kp1_grad, H_xx_v) in zip(input_kp1_grad, input_H_xx_v)
            ]

            if self.max_grad_norm > 0:
                self.clip_grad_norm(input_kp1_grad, self.max_grad_norm)
            p_kp1_grad = [p.mul_(self.momentum) for p in p_kp1_grad]

            if verbose:
                print('mom', k, input_kp1_grad[0][0].norm())

        return input_kp1_grad
예제 #14
0
    def _build_model(cls, model_config, optim_config, data_config):
        """Build Editor.

        Args:
            model_config (Config): Editor config
            optim_config (Config): optimization config
            data_config (Config): dataset config

        Returns:
            Editor
        """

        file_path = join(data.workspace.word_vectors, model_config.wvec_path)
        word_embeddings = load_embeddings(file_path, model_config.word_dim,
                                          model_config.vocab_size,
                                          model_config.num_copy_tokens)
        word_dim = word_embeddings.embed_dim

        edit_model = cls._build_editor(model_config,
                                       data_config,
                                       word_embeddings,
                                       word_dim,
                                       vae_mode=False)

        #VAEretreiver
        vocab_dict = word_embeddings.vocab._word2index
        encoder = Encoder(word_dim,
                          model_config.agenda_dim,
                          model_config.hidden_dim,
                          model_config.encoder_layers,
                          len(data_config.source_cols),
                          model_config.encoder_dropout_prob,
                          use_vae=True,
                          kappa=model_config.vae_kappa,
                          use_target=False)
        source_token_embedder = TokenEmbedder(word_embeddings,
                                              model_config.train_source_embeds)
        target_token_embedder = TokenEmbedder(word_embeddings,
                                              model_config.train_target_embeds)
        ret_copy_len = [5, 10, 165]
        num_inputs = len(data_config.source_cols)
        decoder_cell = AttentionDecoderCell(
            target_token_embedder,
            2 * word_dim,
            # 2 * word_dim because we concat base and copy vectors
            model_config.agenda_dim,
            model_config.hidden_dim,
            model_config.hidden_dim,
            model_config.attention_dim,
            num_layers=model_config.decoder_layers,
            num_inputs=num_inputs,
            dropout_prob=model_config.decoder_dropout_prob,
            disable_attention=True)
        vae_model = VAERetriever(source_token_embedder, encoder, decoder_cell,
                                 ret_copy_len)
        ret_model = vae_model

        vae_ret_model = EditRetriever(vae_model, ret_model, edit_model)
        vae_ret_model = try_gpu(vae_ret_model)

        optimizer = optim.Adam(vae_ret_model.parameters(),
                               lr=optim_config.learning_rate)
        #optimizer = optim.SGD(vae_ret_model.parameters(), lr=optim_config.learning_rate)

        return vae_ret_model, optimizer
예제 #15
0
 def _create_model(self):
     config = self.config
     self.model = create_model(config)
     self.model = try_gpu(self.model)
예제 #16
0
    def __init__(self, config, save_dir):
        super(MiniWoBTrainingRun, self).__init__(config, save_dir)
        self.workspace.add_dir('traces_replay', join('traces', 'replay'))
        self.workspace.add_file('traces_demo', join('traces', 'demo-parse-log.txt'))

        # need to make sure that these times coincide
        assert config.log.trace_evaluate % config.log.evaluate == 0
        assert (config.log.explore % config.explore.program == 0 or
                config.log.explore % config.explore.neural == 0)
        assert config.log.replay % config.train.replay == 0
        assert config.log.trace_replay % config.log.replay == 0
        assert config.log.trace_explore % config.log.explore == 0

        # construct environment
        Episode.configure(config.discount_negative_reward)
        env = Environment.make(config.env.domain, config.env.subdomain)  # TODO: Refactor into a get_environment
        env.configure(
            num_instances=config.env.num_instances, seeds=range(config.env.num_instances), headless=config.env.headless,
            base_url=os.environ.get("MINIWOB_BASE_URL"),
            cache_state=False,  # never cache state
            reward_processor=get_reward_processor(config.env.reward_processor),
            wait_ms=config.env.wait_ms,
            block_on_reset=config.env.block_on_reset,
            refresh_freq=config.env.refresh_freq,
        )
        self._env = env

        # construct episode generators
        self._basic_episode_generator = BasicEpisodeGenerator(self._env,
                                        config.explore.max_steps_per_episode,
                                        config.log.visualize_attention)

        def state_equality_checker(s1, s2):
            """Compare two State objects."""
            r1 = s1.dom.visualize() if s1 else None
            r2 = s2.dom.visualize() if s2 else None
            return r1 == r2
            # TODO(kelvin): better equality check

        # construct episode logger
        trace_dir = join(self.workspace.root, 'traces')
        self._episode_logger = EpisodeLogger(trace_dir, self.tb_logger,
                                             self.metadata)

        # construct replay buffer

        # group episodes by query fields
        episode_grouper = lambda ep: frozenset(ep[0].state.fields.keys)
        episode_identifier = lambda ep: id(ep)

        # each has its own buffer
        group_buffer_factory = lambda: RewardPrioritizedReplayBuffer(
            max_size=config.replay_buffer.size,
            sampling_quantile=1.0,
            discount_factor=config.gamma)

        # buffers are combined into a single grouped buffer
        self._replay_buffer = GroupedReplayBuffer(
            episode_grouper, episode_identifier,
            group_buffer_factory, min_group_size=config.replay_buffer.min_size)

        self._replay_steps = config.train.replay_steps
        self._gamma = config.gamma

        # construct replay logger
        self._replay_logger = ReplayLogger(self.workspace.traces_replay,
                self.tb_logger, self.metadata)

        # load demonstrations
        with open(self.workspace.traces_demo, 'w', 'utf8') as fout:     # pylint: disable=no-member
            # NOTE: this may be an empty list for some tasks
            self._demonstrations = load_demonstrations(
                    config.env.subdomain, config.demonstrations.base_dir,
                    config.demonstrations.parser, logfile=fout,
                    min_raw_reward=config.demonstrations.min_raw_reward)

            # keep a random subset of demonstrations
            with random_seed(0):
                random.shuffle(self._demonstrations)
            self._demonstrations = self._demonstrations[:config.demonstrations.max_to_use]

        num_demonstrations = len(self._demonstrations)
        self.metadata['stats.num_demonstrations'] = num_demonstrations
        if num_demonstrations == 0:
            logging.warn('NO DEMONSTRATIONS AVAILABLE')

        # build neural policy
        neural_policy = try_gpu(MiniWoBPolicy.from_config(config.policy))
        optimizer = optim.Adam(neural_policy.parameters(),
                               lr=config.train.learning_rate)

        # TODO: reload replay buffer?
        self.train_state = self.checkpoints.load_latest(
                neural_policy, optimizer)

        # build program policy
        self._program_policy = self._build_program_policy()