def _build_model(cls, model_config, optim_config, data_config):
        """Build Editor.

        Args:
            model_config (Config): Editor config
            optim_config (Config): optimization config
            data_config (Config): dataset config

        Returns:
            Editor
        """
        file_path = join(data.workspace.word_vectors, model_config.wvec_path)
        word_embeddings = load_embeddings(file_path, model_config.word_dim,
                                          model_config.vocab_size,
                                          model_config.num_copy_tokens)
        word_dim = word_embeddings.embed_dim

        source_token_embedder = TokenEmbedder(word_embeddings,
                                              model_config.train_source_embeds)
        target_token_embedder = TokenEmbedder(word_embeddings,
                                              model_config.train_target_embeds)

        # number of input channels
        num_inputs = len(data_config.source_cols)

        decoder_cell = AttentionDecoderCell(
            target_token_embedder,
            2 *
            word_dim,  # 2 * word_dim because we concat base and copy vectors
            model_config.agenda_dim,
            model_config.hidden_dim,
            model_config.hidden_dim,
            model_config.attention_dim,
            num_layers=model_config.decoder_layers,
            num_inputs=num_inputs,
            dropout_prob=model_config.decoder_dropout_prob,
            disable_attention=False)

        encoder = Encoder(word_dim, model_config.agenda_dim,
                          model_config.hidden_dim, model_config.encoder_layers,
                          num_inputs, model_config.encoder_dropout_prob, False)

        copy_len = [5, 5, 40]
        model = Editor(source_token_embedder,
                       encoder,
                       decoder_cell,
                       copy_lens=copy_len)
        model = try_gpu(model)

        optimizer = optim.Adam(model.parameters(),
                               lr=optim_config.learning_rate)

        return model, optimizer
예제 #2
0
    def _build_model(cls, model_config, optim_config, data_config):
        """Build Editor.

        Args:
            model_config (Config): Editor config
            optim_config (Config): optimization config
            data_config (Config): dataset config

        Returns:
            Editor
        """

        file_path = join(data.workspace.word_vectors, model_config.wvec_path)
        word_embeddings = load_embeddings(file_path, model_config.word_dim,
                                          model_config.vocab_size,
                                          model_config.num_copy_tokens)
        word_dim = word_embeddings.embed_dim

        edit_model = cls._build_editor(model_config,
                                       data_config,
                                       word_embeddings,
                                       word_dim,
                                       vae_mode=False)

        #VAEretreiver
        vocab_dict = word_embeddings.vocab._word2index
        encoder = Encoder(word_dim,
                          model_config.agenda_dim,
                          model_config.hidden_dim,
                          model_config.encoder_layers,
                          len(data_config.source_cols),
                          model_config.encoder_dropout_prob,
                          use_vae=True,
                          kappa=model_config.vae_kappa,
                          use_target=False)
        source_token_embedder = TokenEmbedder(word_embeddings,
                                              model_config.train_source_embeds)
        target_token_embedder = TokenEmbedder(word_embeddings,
                                              model_config.train_target_embeds)
        ret_copy_len = [5, 10, 165]
        num_inputs = len(data_config.source_cols)
        decoder_cell = AttentionDecoderCell(
            target_token_embedder,
            2 * word_dim,
            # 2 * word_dim because we concat base and copy vectors
            model_config.agenda_dim,
            model_config.hidden_dim,
            model_config.hidden_dim,
            model_config.attention_dim,
            num_layers=model_config.decoder_layers,
            num_inputs=num_inputs,
            dropout_prob=model_config.decoder_dropout_prob,
            disable_attention=True)
        vae_model = VAERetriever(source_token_embedder, encoder, decoder_cell,
                                 ret_copy_len)
        ret_model = vae_model

        vae_ret_model = EditRetriever(vae_model, ret_model, edit_model)
        vae_ret_model = try_gpu(vae_ret_model)

        optimizer = optim.Adam(vae_ret_model.parameters(),
                               lr=optim_config.learning_rate)
        #optimizer = optim.SGD(vae_ret_model.parameters(), lr=optim_config.learning_rate)

        return vae_ret_model, optimizer