def __init__(self, config, dataset): super(HRED, self).__init__(config, dataset) # # load parameters info self.embedding_size = config['embedding_size'] self.hidden_size = config['hidden_size'] self.num_enc_layers = config['num_enc_layers'] self.num_dec_layers = config['num_dec_layers'] self.rnn_type = config['rnn_type'] self.bidirectional = config['bidirectional'] self.num_directions = 2 if self.bidirectional else 1 self.dropout_ratio = config['dropout_ratio'] self.strategy = config['decoding_strategy'] self.attention_type = config['attention_type'] self.alignment_method = config['alignment_method'] if (self.strategy not in ['topk_sampling', 'greedy_search', 'beam_search']): raise NotImplementedError( "{} decoding strategy not implemented".format(self.strategy)) if (self.strategy == 'beam_search'): self.beam_size = config['beam_size'] self.context_size = self.hidden_size * self.num_directions self.padding_token_idx = dataset.padding_token_idx self.sos_token_idx = dataset.sos_token_idx self.eos_token_idx = dataset.eos_token_idx # define layers and loss self.token_embedder = nn.Embedding(self.vocab_size, self.embedding_size, padding_idx=self.padding_token_idx) self.utterance_encoder = BasicRNNEncoder( self.embedding_size, self.hidden_size, self.num_enc_layers, self.rnn_type, self.dropout_ratio, self.bidirectional) self.context_encoder = BasicRNNEncoder( self.hidden_size * 2, self.hidden_size, self.num_enc_layers, self.rnn_type, self.dropout_ratio, self.bidirectional) if self.attention_type is not None: self.decoder = AttentionalRNNDecoder( self.embedding_size + self.hidden_size, self.hidden_size, self.context_size, self.num_dec_layers, self.rnn_type, self.dropout_ratio, self.attention_type, self.alignment_method) else: self.decoder = BasicRNNDecoder( self.embedding_size + self.hidden_size, self.hidden_size, self.num_dec_layers, self.rnn_type, self.dropout_ratio) self.dropout = nn.Dropout(self.dropout_ratio) self.vocab_linear = nn.Linear(self.hidden_size, self.vocab_size) self.loss = nn.CrossEntropyLoss(ignore_index=self.padding_token_idx, reduction='none') self.max_target_length = config['max_seq_length'] # parameters initialization self.apply(xavier_normal_initialization)
def __init__(self, config, dataset): super(PointerNet, self).__init__(config, dataset) # load parameters info self.embedding_size = config['embedding_size'] self.hidden_size = config['hidden_size'] self.num_enc_layers = config['num_enc_layers'] self.num_dec_layers = config['num_dec_layers'] self.rnn_type = config['rnn_type'] self.bidirectional = config['bidirectional'] self.dropout_ratio = config['dropout_ratio'] self.strategy = config['decoding_strategy'] self.is_attention = config['is_attention'] self.is_pgen = config['is_pgen'] and self.is_attention self.is_coverage = config['is_coverage'] and self.is_attention if self.is_coverage: self.cov_loss_lambda = config['cov_loss_lambda'] if (self.strategy not in ['topk_sampling', 'greedy_search', 'beam_search']): raise NotImplementedError( "{} decoding strategy not implemented".format(self.strategy)) if (self.strategy == 'beam_search'): self.beam_size = config['beam_size'] self.context_size = self.hidden_size # define layers and loss self.source_token_embedder = nn.Embedding( self.source_vocab_size, self.embedding_size, padding_idx=self.padding_token_idx) if config['share_vocab']: self.target_token_embedder = self.source_token_embedder else: self.target_token_embedder = nn.Embedding( self.target_vocab_size, self.embedding_size, padding_idx=self.padding_token_idx) self.encoder = BasicRNNEncoder(self.embedding_size, self.hidden_size, self.num_enc_layers, self.rnn_type, self.dropout_ratio, self.bidirectional) self.decoder = PointerRNNDecoder(self.target_vocab_size, self.embedding_size, self.hidden_size, self.context_size, self.num_dec_layers, self.rnn_type, self.dropout_ratio, is_attention=self.is_attention, is_pgen=self.is_pgen, is_coverage=self.is_coverage) # parameters initialization self.apply(xavier_normal_initialization)
def __init__(self, config, dataset): super(RNNVAE, self).__init__(config, dataset) # load parameters info self.embedding_size = config['embedding_size'] self.hidden_size = config['hidden_size'] self.latent_size = config['latent_size'] self.num_enc_layers = config['num_enc_layers'] self.num_dec_layers = config['num_dec_layers'] self.num_highway_layers = config['num_highway_layers'] self.rnn_type = config['rnn_type'] self.max_epoch = config['epochs'] self.bidirectional = config['bidirectional'] self.dropout_ratio = config['dropout_ratio'] self.eval_generate_num = config['eval_generate_num'] self.max_length = config['max_seq_length'] self.num_directions = 2 if self.bidirectional else 1 self.padding_token_idx = dataset.padding_token_idx self.sos_token_idx = dataset.sos_token_idx self.eos_token_idx = dataset.eos_token_idx # define layers and loss self.token_embedder = nn.Embedding(self.vocab_size, self.embedding_size, padding_idx=self.padding_token_idx) self.encoder = BasicRNNEncoder(self.embedding_size, self.hidden_size, self.num_enc_layers, self.rnn_type, self.dropout_ratio, self.bidirectional) self.decoder = BasicRNNDecoder(self.embedding_size, self.hidden_size, self.num_dec_layers, self.rnn_type, self.dropout_ratio) self.dropout = nn.Dropout(self.dropout_ratio) self.vocab_linear = nn.Linear(self.hidden_size, self.vocab_size) self.loss = nn.CrossEntropyLoss(ignore_index=self.padding_token_idx, reduction='none') if self.rnn_type == "lstm": self.hidden_to_mean = nn.Linear( self.num_directions * self.hidden_size, self.latent_size) self.hidden_to_logvar = nn.Linear( self.num_directions * self.hidden_size, self.latent_size) self.latent_to_hidden = nn.Linear(self.latent_size, 2 * self.hidden_size) elif self.rnn_type == 'gru' or self.rnn_type == 'rnn': self.hidden_to_mean = nn.Linear( self.num_directions * self.hidden_size, self.latent_size) self.hidden_to_logvar = nn.Linear( self.num_directions * self.hidden_size, self.latent_size) self.latent_to_hidden = nn.Linear(self.latent_size, 2 * self.hidden_size) else: raise ValueError("No such rnn type {} for RNNVAE.".format( self.rnn_type)) # parameters initialization self.apply(xavier_normal_initialization)
def __init__(self, config, dataset): super(CNNVAE, self).__init__(config, dataset) # load parameters info self.embedding_size = config['embedding_size'] self.hidden_size = config['hidden_size'] self.latent_size = config['latent_size'] self.num_enc_layers = config['num_enc_layers'] self.num_highway_layers = config['num_highway_layers'] self.rnn_type = config['rnn_type'] self.max_epoch = config['epochs'] self.decoder_kernel_size = config['decoder_kernel_size'] self.decoder_dilations = config['decoder_dilations'] self.bidirectional = config['bidirectional'] self.dropout_ratio = config['dropout_ratio'] self.eval_generate_num = config['eval_generate_num'] self.max_length = config['max_seq_length'] self.num_directions = 2 if self.bidirectional else 1 self.padding_token_idx = dataset.padding_token_idx self.sos_token_idx = dataset.sos_token_idx self.eos_token_idx = dataset.eos_token_idx # define layers and loss self.token_embedder = nn.Embedding(self.vocab_size, self.embedding_size, padding_idx=self.padding_token_idx) self.encoder = BasicRNNEncoder(self.embedding_size, self.hidden_size, self.num_enc_layers, self.rnn_type, self.dropout_ratio, self.bidirectional) self.decoder = BasicCNNDecoder(self.embedding_size, self.latent_size, self.decoder_kernel_size, self.decoder_dilations, self.dropout_ratio) self.highway_1 = Highway(self.num_highway_layers, self.embedding_size) self.highway_2 = Highway(self.num_highway_layers, self.num_directions * self.hidden_size) self.dropout = nn.Dropout(self.dropout_ratio) self.vocab_linear = nn.Linear(self.decoder_kernel_size[-1], self.vocab_size) self.loss = nn.CrossEntropyLoss(ignore_index=self.padding_token_idx, reduction='none') self.hidden_to_mean = nn.Linear(self.num_directions * self.hidden_size, self.latent_size) self.hidden_to_logvar = nn.Linear( self.num_directions * self.hidden_size, self.latent_size) # parameters initialization self.apply(xavier_normal_initialization)
def __init__(self, config, dataset): super(MaskGANGenerator, self).__init__(config, dataset) # load parameters info self.embedding_size = config['embedding_size'] self.hidden_size = config['hidden_size'] self.num_enc_layers = config['num_enc_layers'] self.num_dec_layers = config['num_dec_layers'] self.rnn_type = config['rnn_type'] self.bidirectional = config['bidirectional'] self.alignment_method = config['alignment_method'] self.dropout_ratio = config['dropout_ratio'] self.attention_type = config['attention_type'] self.context_size = config['context_size'] self.gamma = config['rl_discount_rate'] self.advantage_clipping = config['advantage_clipping'] self.eval_generate_num = config['eval_generate_num'] self.attention_type = config['attention_type'] self.padding_token_idx = dataset.padding_token_idx self.sos_token_idx = dataset.sos_token_idx self.eos_token_idx = dataset.eos_token_idx self.mask_token_idx = dataset.user_token_idx[0] self.max_length = config['max_seq_length'] self.embedder = nn.Embedding(self.vocab_size, self.embedding_size) # note!!! batch_first is true self.encoder = BasicRNNEncoder(self.embedding_size, self.hidden_size, self.num_enc_layers, self.rnn_type, self.dropout_ratio, self.bidirectional) if self.attention_type is not None: self.decoder = AttentionalRNNDecoder( self.embedding_size, self.hidden_size, self.context_size, self.num_dec_layers, self.rnn_type, self.dropout_ratio, self.attention_type, self.alignment_method) else: self.decoder = BasicRNNDecoder(self.embedding_size, self.hidden_size, self.num_dec_layers, self.rnn_type, self.dropout_ratio) self.dropout = nn.Dropout(self.dropout_ratio) self.vocab_linear = nn.Linear(self.hidden_size, self.vocab_size) # parameters initialization self.apply(xavier_normal_initialization)
def __init__(self, config, dataset): super(MaskGANDiscriminator, self).__init__(config, dataset) # load parameters info self.embedding_size = config['embedding_size'] self.hidden_size = config['hidden_size'] self.num_enc_layers = config['num_enc_layers'] self.num_dec_layers = config['num_dec_layers'] self.rnn_type = config['rnn_type'] self.bidirectional = config['bidirectional'] self.alignment_method = config['alignment_method'] self.dropout_ratio = config['dropout_ratio'] self.attention_type = config['attention_type'] self.context_size = config['context_size'] self.attention_type = config['attention_type'] self.padding_token_idx = dataset.padding_token_idx self.sos_token_idx = dataset.sos_token_idx self.eos_token_idx = dataset.eos_token_idx self.mask_token_idx = dataset.user_token_idx[0] self.encoder = BasicRNNEncoder(self.embedding_size, self.hidden_size, self.num_enc_layers, self.rnn_type, self.dropout_ratio, self.bidirectional) if self.attention_type is not None: self.decoder = AttentionalRNNDecoder( self.embedding_size, self.hidden_size, self.context_size, self.num_dec_layers, self.rnn_type, self.dropout_ratio, self.attention_type, self.alignment_method) else: self.decoder = BasicRNNDecoder(self.embedding_size, self.hidden_size, self.num_dec_layers, self.rnn_type, self.dropout_ratio) self.dropout = nn.Dropout(self.dropout_ratio) self.fc_linear = nn.Linear(self.hidden_size, 1) self.critic_fc_linear = nn.Linear(self.hidden_size, 1) # parameters initialization self.apply(xavier_normal_initialization)
def __init__(self, config, dataset): super(CVAE, self).__init__(config, dataset) # load parameters info self.embedding_size = config['embedding_size'] self.hidden_size = config['hidden_size'] self.latent_size = config['latent_size'] self.num_enc_layers = config['num_enc_layers'] self.num_dec_layers = config['num_dec_layers'] self.rnn_type = config['rnn_type'] self.max_epoch = config['epochs'] self.bidirectional = config['bidirectional'] self.dropout_ratio = config['dropout_ratio'] self.eval_generate_num = config['eval_generate_num'] self.prior_neuron_size = config['prior_neuron_size'] # neuron size in the prior network self.posterior_neuron_size = config['posterior_neuron_size'] # neuron size in the posterior network self.latent_neuron_size = config['latent_neuron_size'] # neuron size in latent_to_hidden self.num_directions = 2 if self.bidirectional else 1 # define layers and loss self.token_embedder = nn.Embedding(self.vocab_size, self.embedding_size, padding_idx=self.padding_token_idx) self.encoder = BasicRNNEncoder( self.embedding_size, self.hidden_size, self.num_enc_layers, self.rnn_type, self.dropout_ratio, self.bidirectional ) self.decoder = BasicRNNDecoder( self.embedding_size, self.hidden_size, self.num_dec_layers, self.rnn_type, self.dropout_ratio ) self.dropout = nn.Dropout(self.dropout_ratio) self.vocab_linear = nn.Linear(self.hidden_size, self.vocab_size) self.loss = nn.CrossEntropyLoss(ignore_index=self.padding_token_idx, reduction='none') if self.rnn_type == "lstm": # prior network self.prior_mean_linear1 = nn.Linear(2 * self.num_directions * self.hidden_size, self.prior_neuron_size) self.prior_mean_linear2 = nn.Linear(self.prior_neuron_size, self.latent_size) self.prior_logvar_linear1 = nn.Linear(2 * self.num_directions * self.hidden_size, self.prior_neuron_size) self.prior_logvar_linear2 = nn.Linear(self.prior_neuron_size, self.latent_size) # posterior network self.posterior_mean_linear1 = nn.Linear( 3 * self.num_directions * self.hidden_size, self.posterior_neuron_size ) self.posterior_mean_linear2 = nn.Linear(self.posterior_neuron_size, self.latent_size) self.posterior_logvar_linear1 = nn.Linear( 3 * self.num_directions * self.hidden_size, self.posterior_neuron_size ) self.posterior_logvar_linear2 = nn.Linear(self.posterior_neuron_size, self.latent_size) self.latent_to_hidden = nn.Linear( 2 * self.num_directions * self.hidden_size + self.latent_size, 2 * self.hidden_size ) # first args size=title+pre_line+z elif self.rnn_type == 'gru' or self.rnn_type == 'rnn': # prior network self.prior_mean_linear1 = nn.Linear(2 * self.num_directions * self.hidden_size, self.prior_neuron_size) self.prior_mean_linear2 = nn.Linear(self.prior_neuron_size, self.latent_size) self.prior_logvar_linear1 = nn.Linear(2 * self.num_directions * self.hidden_size, self.prior_neuron_size) self.prior_logvar_linear2 = nn.Linear(self.prior_neuron_size, self.latent_size) # posterior network self.posterior_mean_linear1 = nn.Linear( 3 * self.num_directions * self.hidden_size, self.posterior_neuron_size ) self.posterior_mean_linear2 = nn.Linear(self.posterior_neuron_size, self.latent_size) self.posterior_logvar_linear1 = nn.Linear( 3 * self.num_directions * self.hidden_size, self.posterior_neuron_size ) self.posterior_logvar_linear2 = nn.Linear(self.posterior_neuron_size, self.latent_size) # prepare for the decoder self.latent_to_hidden1 = nn.Linear( 2 * self.num_directions * self.hidden_size + self.latent_size, self.latent_neuron_size ) self.latent_to_hidden2 = nn.Linear(self.latent_neuron_size, self.hidden_size) else: raise ValueError("No such rnn type {} for CVAE.".format(self.rnn_type)) # parameters initialization self.apply(self.xavier_uniform_initialization)
def __init__(self, config, dataset): super(Kb2Text, self).__init__(config, dataset) # load parameters info self.rnn_type = config['rnn_type'] self.attention_type = config['attention_type'] self.alignment_method = config['alignment_method'] self.embedding_size = config['embedding_size'] self.hidden_size = config['hidden_size'] self.num_enc_layers = config['num_enc_layers'] self.num_dec_layers = config['num_dec_layers'] self.attn_weight_dropout_ratio = config['attn_weight_dropout_ratio'] self.dropout_ratio = config['dropout_ratio'] self.strategy = config['decoding_strategy'] self.num_heads = config['num_heads'] self.GAT_layer_nums = config['GAT_layer_nums'] self.NODE_TYPE = {'entity': 0, 'root': 1, 'relation': 2} self.source_relation_idx2token = dataset.source_relation_idx2token self.source_relation_token2idx = dataset.source_relation_token2idx self.REL_SET = [] self.type_vocab = [] with open(config["relation_vocab"]) as f: for line in f: self.REL_SET.append(line.rstrip()) with open(config["type_vocab"]) as f: for line in f: self.type_vocab.append(line.rstrip()) self.REL_LEN = 2 * len(self.REL_SET) + 1 if (self.strategy not in ['topk_sampling', 'greedy_search', 'beam_search']): raise NotImplementedError( "{} decoding strategy not implemented".format(self.strategy)) if (self.strategy == 'beam_search'): self.beam_size = config['beam_size'] self.padding_token_idx = dataset.padding_token_idx self.sos_token_idx = dataset.sos_token_idx self.eos_token_idx = dataset.eos_token_idx self.source_token2idx = dataset.source_token2idx self.source_entity_token2idx = dataset.source_entity_token2idx self.entity_vocab_size = len(self.source_entity_token2idx) self.source_token_embedder = nn.Embedding( self.source_vocab_size, self.embedding_size, padding_idx=self.padding_token_idx) self.target_token_embedder = nn.Embedding( self.target_vocab_size, self.embedding_size, padding_idx=self.padding_token_idx) self.entity_token_embedder = nn.Embedding( self.entity_vocab_size, self.embedding_size, padding_idx=self.padding_token_idx) self.rel_token_embedder = nn.Embedding( self.REL_LEN, self.embedding_size, padding_idx=self.padding_token_idx) self.entity_encoder = BasicRNNEncoder(self.embedding_size, self.hidden_size // 2, self.num_enc_layers, 'lstm', self.dropout_ratio) self.source_encoder = BasicRNNEncoder(self.embedding_size, self.hidden_size // 2, self.num_enc_layers, 'lstm', self.dropout_ratio) self.graph_encoder = GraphTransformer(self.embedding_size, self.attn_weight_dropout_ratio, self.dropout_ratio, self.GAT_layer_nums) self.attn_layer = MultiHeadAttention(self.embedding_size, self.num_heads, self.attn_weight_dropout_ratio) self.decoder = ContextAttentionalDecoder( self.embedding_size, self.hidden_size, self.embedding_size, self.num_dec_layers, self.rnn_type, self.dropout_ratio, self.attention_type, self.alignment_method) self.dropout = nn.Dropout(self.dropout_ratio) self.vocab_linear = nn.Linear(3 * self.hidden_size, self.target_vocab_size) self.copy_linear = nn.Linear(3 * self.hidden_size, 1) self.d_linear = nn.Linear(3 * self.embedding_size, self.embedding_size) self.copy_attn = MultiHeadAttention(self.embedding_size, 1, 0., return_distribute=True) self.loss = nn.NLLLoss(ignore_index=self.padding_token_idx, reduction='none') self.max_target_length = config['max_seq_length'] # parameters initialization self.apply(xavier_normal_initialization)