def _build_model(cls, model_config, optim_config, data_config): """Build Editor. Args: model_config (Config): Editor config optim_config (Config): optimization config data_config (Config): dataset config Returns: Editor """ file_path = join(data.workspace.word_vectors, model_config.wvec_path) word_embeddings = load_embeddings(file_path, model_config.word_dim, model_config.vocab_size, model_config.num_copy_tokens) word_dim = word_embeddings.embed_dim source_token_embedder = TokenEmbedder(word_embeddings, model_config.train_source_embeds) target_token_embedder = TokenEmbedder(word_embeddings, model_config.train_target_embeds) # number of input channels num_inputs = len(data_config.source_cols) decoder_cell = AttentionDecoderCell( target_token_embedder, 2 * word_dim, # 2 * word_dim because we concat base and copy vectors model_config.agenda_dim, model_config.hidden_dim, model_config.hidden_dim, model_config.attention_dim, num_layers=model_config.decoder_layers, num_inputs=num_inputs, dropout_prob=model_config.decoder_dropout_prob, disable_attention=False) encoder = Encoder(word_dim, model_config.agenda_dim, model_config.hidden_dim, model_config.encoder_layers, num_inputs, model_config.encoder_dropout_prob, False) copy_len = [5, 5, 40] model = Editor(source_token_embedder, encoder, decoder_cell, copy_lens=copy_len) model = try_gpu(model) optimizer = optim.Adam(model.parameters(), lr=optim_config.learning_rate) return model, optimizer
def _build_model(cls, model_config, optim_config, data_config): """Build Editor. Args: model_config (Config): Editor config optim_config (Config): optimization config data_config (Config): dataset config Returns: Editor """ file_path = join(data.workspace.word_vectors, model_config.wvec_path) word_embeddings = load_embeddings(file_path, model_config.word_dim, model_config.vocab_size, model_config.num_copy_tokens) word_dim = word_embeddings.embed_dim edit_model = cls._build_editor(model_config, data_config, word_embeddings, word_dim, vae_mode=False) #VAEretreiver vocab_dict = word_embeddings.vocab._word2index encoder = Encoder(word_dim, model_config.agenda_dim, model_config.hidden_dim, model_config.encoder_layers, len(data_config.source_cols), model_config.encoder_dropout_prob, use_vae=True, kappa=model_config.vae_kappa, use_target=False) source_token_embedder = TokenEmbedder(word_embeddings, model_config.train_source_embeds) target_token_embedder = TokenEmbedder(word_embeddings, model_config.train_target_embeds) ret_copy_len = [5, 10, 165] num_inputs = len(data_config.source_cols) decoder_cell = AttentionDecoderCell( target_token_embedder, 2 * word_dim, # 2 * word_dim because we concat base and copy vectors model_config.agenda_dim, model_config.hidden_dim, model_config.hidden_dim, model_config.attention_dim, num_layers=model_config.decoder_layers, num_inputs=num_inputs, dropout_prob=model_config.decoder_dropout_prob, disable_attention=True) vae_model = VAERetriever(source_token_embedder, encoder, decoder_cell, ret_copy_len) ret_model = vae_model vae_ret_model = EditRetriever(vae_model, ret_model, edit_model) vae_ret_model = try_gpu(vae_ret_model) optimizer = optim.Adam(vae_ret_model.parameters(), lr=optim_config.learning_rate) #optimizer = optim.SGD(vae_ret_model.parameters(), lr=optim_config.learning_rate) return vae_ret_model, optimizer