def __init__(self, vocab_size, hidden_size, embed_units, max_hop=3, padding_idx=None, n_layers=1, attn_mode='mlp', dropout=0.0, use_gpu=False): super(Mem2Seq, self).__init__() self.vocab_size = vocab_size self.hidden_size = hidden_size self.embed_units = embed_units self.max_hop = max_hop self.padding_idx = padding_idx self.attn_mode = attn_mode self.dropout = dropout self.use_gpu = use_gpu self.n_layers = n_layers enc_embedder = Embedder(num_embeddings=self.vocab_size, embedding_dim=self.embed_units, padding_idx=self.padding_idx) self.rnn_encoder = RNNEncoder(input_size=self.embed_units, hidden_size=self.hidden_size, embedder=enc_embedder, dropout=self.dropout) self.mem_encoder = EncoderMemNN(vocab=self.vocab_size, hidden_size=self.hidden_size, hop=self.max_hop, attn_mode='general', padding_idx=self.padding_idx) self.decoder = DecoderMemNN(vocab=self.vocab_size, embedding_dim=self.hidden_size, hidden_size=self.hidden_size, hop=self.max_hop, dropout=self.dropout, padding_idx=self.padding_idx) if self.padding_idx is not None: self.weight = torch.ones(self.vocab_size) self.weight[self.padding_idx] = 0 else: self.weight = None self.nll_loss = NLLLoss(weight=self.weight, ignore_index=self.padding_idx, reduction='mean') if self.use_gpu: self.cuda() self.weight = self.weight.cuda()
def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, hidden_size, padding_idx=None, num_layers=1, bidirectional=True, attn_mode="mlp", attn_hidden_size=None, with_bridge=False, tie_embedding=False, dropout=0.0, use_gpu=False, use_bow=False, use_kd=False, use_dssm=False, use_posterior=False, weight_control=False, use_pg=False, use_gs=False, concat=False, pretrain_epoch=0): super(KnowledgeSeq2Seq, self).__init__() self.src_vocab_size = src_vocab_size self.tgt_vocab_size = tgt_vocab_size self.embed_size = embed_size self.hidden_size = hidden_size self.padding_idx = padding_idx self.num_layers = num_layers self.bidirectional = bidirectional self.attn_mode = attn_mode self.attn_hidden_size = attn_hidden_size self.with_bridge = with_bridge self.tie_embedding = tie_embedding self.dropout = dropout self.use_gpu = use_gpu self.use_bow = use_bow self.use_dssm = use_dssm self.weight_control = weight_control self.use_kd = use_kd self.use_pg = use_pg self.use_gs = use_gs self.use_posterior = use_posterior self.pretrain_epoch = pretrain_epoch self.baseline = 0 enc_embedder = Embedder(num_embeddings=self.src_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) if self.with_bridge: self.bridge = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh()) if self.tie_embedding: assert self.src_vocab_size == self.tgt_vocab_size dec_embedder = enc_embedder knowledge_embedder = enc_embedder else: dec_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) knowledge_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.knowledge_encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=knowledge_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) self.prior_attention = Attention(query_size=self.hidden_size, memory_size=self.hidden_size, hidden_size=self.hidden_size, mode="dot") self.posterior_attention = Attention(query_size=self.hidden_size, memory_size=self.hidden_size, hidden_size=self.hidden_size, mode="dot") self.decoder = RNNDecoder(input_size=self.embed_size, hidden_size=self.hidden_size, output_size=self.tgt_vocab_size, embedder=dec_embedder, num_layers=self.num_layers, attn_mode=self.attn_mode, memory_size=self.hidden_size, feature_size=None, dropout=self.dropout, concat=concat) self.log_softmax = nn.LogSoftmax(dim=-1) self.softmax = nn.Softmax(dim=-1) self.sigmoid = nn.Sigmoid() self.softplus = nn.Softplus() if self.use_bow: self.bow_output_layer = nn.Sequential( nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size), nn.Tanh(), nn.Linear(in_features=self.hidden_size, out_features=self.tgt_vocab_size), nn.LogSoftmax(dim=-1)) if self.use_dssm: self.dssm_project = nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size) self.mse_loss = torch.nn.MSELoss(reduction='mean') if self.use_kd: self.knowledge_dropout = nn.Dropout() if self.padding_idx is not None: self.weight = torch.ones(self.tgt_vocab_size) self.weight[self.padding_idx] = 0 else: self.weight = None self.nll_loss = NLLLoss(weight=self.weight, ignore_index=self.padding_idx, reduction='mean') self.kl_loss = torch.nn.KLDivLoss(size_average=True) if self.use_gpu: self.cuda() self.weight = self.weight.cuda()
def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, hidden_size, padding_idx=None, num_layers=1, bidirectional=True, attn_mode="mlp", attn_hidden_size=None, with_bridge=False, tie_embedding=False, dropout=0.0, use_gpu=False, use_bow=False, use_kd=False, use_dssm=False, use_posterior=False, weight_control=False, use_pg=False, use_gs=False, concat=False, pretrain_epoch=0): super(KnowledgeSeq2Seq, self).__init__() self.src_vocab_size = src_vocab_size self.tgt_vocab_size = tgt_vocab_size self.embed_size = embed_size self.hidden_size = hidden_size self.padding_idx = padding_idx self.num_layers = num_layers self.bidirectional = bidirectional self.attn_mode = attn_mode self.attn_hidden_size = attn_hidden_size self.with_bridge = with_bridge self.tie_embedding = tie_embedding self.dropout = dropout self.use_gpu = use_gpu self.use_bow = use_bow self.use_dssm = use_dssm self.weight_control = weight_control self.use_kd = use_kd self.use_pg = use_pg self.use_gs = use_gs self.use_posterior = use_posterior self.pretrain_epoch = pretrain_epoch self.baseline = 0 bc = BertClient() enc_embedder = bc.encode(['你好', '吃饭了么']) # enc_embedder = Embedder(num_embeddings=self.src_vocab_size, # embedding_dim=self.embed_size, padding_idx=self.padding_idx) # Embedder(30004, 300, padding_idx=0) self.encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) if self.with_bridge: self.bridge = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh()) #定义一个linear层 ,通过Squential将网络层和激活函数结合起来,输出激活后的网络节点 if self.tie_embedding: assert self.src_vocab_size == self.tgt_vocab_size dec_embedder = enc_embedder knowledge_embedder = enc_embedder else: dec_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) knowledge_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.knowledge_encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=knowledge_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) self.prior_attention = Attention(query_size=self.hidden_size, memory_size=self.hidden_size, hidden_size=self.hidden_size, mode="dot") self.posterior_attention = Attention(query_size=self.hidden_size, memory_size=self.hidden_size, hidden_size=self.hidden_size, mode="dot") self.decoder = RNNDecoder(input_size=self.embed_size, hidden_size=self.hidden_size, output_size=self.tgt_vocab_size, embedder=dec_embedder, num_layers=self.num_layers, attn_mode=self.attn_mode, memory_size=self.hidden_size, feature_size=None, dropout=self.dropout, concat=concat) self.log_softmax = nn.LogSoftmax(dim=-1) self.softmax = nn.Softmax(dim=-1) self.sigmoid = nn.Sigmoid() self.softplus = nn.Softplus() """ Softplus():a smooth approximation to the ReLU function and can be used to constrain the output of a machine to always be positive. """ if self.use_bow: self.bow_output_layer = nn.Sequential( nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size), nn.Tanh(), nn.Linear(in_features=self.hidden_size, out_features=self.tgt_vocab_size), nn.LogSoftmax(dim=-1)) if self.use_dssm: self.dssm_project = nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size) self.mse_loss = torch.nn.MSELoss(reduction='mean') if self.use_kd: self.knowledge_dropout = nn.Dropout() if self.padding_idx is not None: self.weight = torch.ones(self.tgt_vocab_size) self.weight[self.padding_idx] = 0 else: self.weight = None self.nll_loss = NLLLoss( weight=self.weight, ignore_index=self.padding_idx, reduction='mean') #量化真实回复与基线生成的回复的不同 :NLLLoss() 负对数似然 self.kl_loss = torch.nn.KLDivLoss( size_average=True ) #KLDivLoss() #select related background knowledge -> lead the conversation if self.use_gpu: self.cuda() self.weight = self.weight.cuda()
def __init__(self, corpus, config): super(Seq2Seq, self).__init__() self.src_vocab_size = corpus.SRC.vocab_size self.tgt_vocab_size = corpus.TGT.vocab_size self.output_vocab_size = corpus.OUTPUT.vocab_size self.topic_vocab_size = corpus.TOPIC.vocab_size self.padding_idx = corpus.padding_idx self.embed_size = config.embed_size self.hidden_size = config.hidden_size self.num_layers = config.num_layers self.bidirectional = config.bidirectional self.attn_mode = config.attn_mode self.attn_hidden_size = config.attn_hidden_size self.with_bridge = config.with_bridge self.tie_embedding = config.tie_embedding self.dropout = config.dropout self.use_gpu = config.use_gpu self.decoder_attention_channels = config.decoder_attention_channels self.topic_k = config.topic_k self.topic_num = config.topic_num self.decoder_attention_channels = config.decoder_attention_channels self.use_ntm = config.use_ntm # topic self.build_neural_topic_model(corpus) self.t_to_feature = nn.Sequential( nn.Linear(self.embed_size * len(self.decoder_attention_channels), self.hidden_size), nn.Tanh(), nn.Dropout(p=self.dropout), ) # encoder enc_embedder = Embedder(num_embeddings=self.src_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) # bridge if self.with_bridge: self.bridge = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh(), ) # decoder if self.tie_embedding: assert self.src_vocab_size == self.tgt_vocab_size dec_embedder = enc_embedder else: dec_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) topic_vocab_size_ = self.topic_vocab_size if config.without_topic_project: topic_vocab_size_ = None if config.without_fd is not None and config.without_fd is True: RNNDecoder = RNNDecoder_noFd topic_vocab_size_ = None else: RNNDecoder = RNNDecoder_full self.decoder = RNNDecoder( input_size=self.embed_size, hidden_size=self.hidden_size, output_size=self.output_vocab_size, topic_size=topic_vocab_size_, trans_mat=self.trans_mat, embedder=dec_embedder, num_layers=self.num_layers, attn_mode=self.attn_mode, attn_hidden_size=self.attn_hidden_size, memory_size=self.hidden_size, feature_size=None, dropout=self.dropout, tgt_unk_idx=corpus.OUTPUT.itos.index('<unk>'), attention_channels=self.decoder_attention_channels, ) # Loss Definition if self.padding_idx is not None: weight = torch.ones(self.output_vocab_size) weight[self.padding_idx] = 0 else: weight = None self.nll_loss = NLLLoss(weight=weight, ignore_index=self.padding_idx, reduction='mean') if self.use_gpu: self.cuda()
def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, hidden_size, padding_idx=None, num_layers=1, bidirectional=True, attn_mode="mlp", attn_hidden_size=None, with_bridge=False, tie_embedding=False, dropout=0.0, use_gpu=False): super(HSeq2Seq, self).__init__() self.src_vocab_size = src_vocab_size self.tgt_vocab_size = tgt_vocab_size self.embed_size = embed_size self.hidden_size = hidden_size self.padding_idx = padding_idx self.num_layers = num_layers self.bidirectional = bidirectional self.attn_mode = attn_mode self.attn_hidden_size = attn_hidden_size self.with_bridge = with_bridge self.tie_embedding = tie_embedding self.dropout = dropout self.use_gpu = use_gpu enc_embedder = Embedder(num_embeddings=self.src_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.sub_encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) self.hiera_encoder = RNNEncoder(input_size=self.hidden_size, hidden_size=self.hidden_size, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) self.encoder = HRNNEncoder(self.sub_encoder, self.hiera_encoder) if self.with_bridge: self.bridge = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh(), ) if self.tie_embedding: assert self.src_vocab_size == self.tgt_vocab_size dec_embedder = enc_embedder else: dec_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.decoder = RNNDecoder(input_size=self.embed_size, hidden_size=self.hidden_size, output_size=self.tgt_vocab_size, embedder=dec_embedder, num_layers=self.num_layers, attn_mode=self.attn_mode, attn_hidden_size=self.attn_hidden_size, memory_size=self.hidden_size, feature_size=None, dropout=self.dropout) # Loss Definition if self.padding_idx is not None: weight = torch.ones(self.tgt_vocab_size) weight[self.padding_idx] = 0 else: weight = None self.nll_loss = NLLLoss(weight=weight, ignore_index=self.padding_idx, reduction='mean') if self.use_gpu: self.cuda()
def __init__(self, corpus, config): super(Seq2Seq, self).__init__() self.src_vocab_size = corpus.SRC.vocab_size self.tgt_vocab_size = corpus.TGT.vocab_size self.embed_size = config.embed_size self.hidden_size = config.hidden_size self.padding_idx = corpus.padding_idx self.num_layers = config.num_layers self.bidirectional = config.bidirectional self.attn_mode = config.attn_mode self.attn_hidden_size = config.attn_hidden_size self.with_bridge = config.with_bridge self.tie_embedding = config.tie_embedding self.dropout = config.dropout self.use_gpu = config.use_gpu enc_embedder = Embedder(num_embeddings=self.src_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) if self.with_bridge: self.bridge = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh(), ) if self.tie_embedding: assert self.src_vocab_size == self.tgt_vocab_size dec_embedder = enc_embedder else: dec_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.decoder = RNNDecoder(input_size=self.embed_size, hidden_size=self.hidden_size, output_size=self.tgt_vocab_size, embedder=dec_embedder, num_layers=self.num_layers, attn_mode=self.attn_mode, attn_hidden_size=self.attn_hidden_size, memory_size=self.hidden_size, feature_size=None, dropout=self.dropout) # Loss Definition if self.padding_idx is not None: weight = torch.ones(self.tgt_vocab_size) weight[self.padding_idx] = 0 else: weight = None self.nll_loss = NLLLoss(weight=weight, ignore_index=self.padding_idx, reduction='mean') if self.use_gpu: self.cuda()
def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, hidden_size, padding_idx=None, num_layers=1, bidirectional=True, tie_embedding=False, margin=None, with_project=False, dropout=0.0, use_gpu=False): super(DSSM, self).__init__() self.src_vocab_size = src_vocab_size self.tgt_vocab_size = tgt_vocab_size self.embed_size = embed_size self.hidden_size = hidden_size self.padding_idx = padding_idx self.num_layers = num_layers self.bidirectional = bidirectional self.tie_embedding = tie_embedding self.dropout = dropout self.use_gpu = use_gpu self.margin = margin self.with_project = with_project src_embedder = Embedder(num_embeddings=self.src_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.src_encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=src_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) if self.with_project: self.project = nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size, bias=False) if self.tie_embedding: assert self.src_vocab_size == self.tgt_vocab_size tgt_embedder = src_embedder else: tgt_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.tgt_encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=tgt_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) if self.use_gpu: self.cuda()
def __init__(self, vocab_size, embed_units, hidden_size, padding_idx=None, num_layers=1, max_hop=3, bidirectional=True, attn_mode='mlp', dropout=0.0, use_gpu=False): super(MemNet, self).__init__() self.vocab_size = vocab_size self.embed_units = embed_units self.padding_idx = padding_idx self.hidden_size = hidden_size self.num_layers = num_layers self.max_hop = max_hop self.bidirectional = bidirectional self.dropout = dropout self.attn_mode = attn_mode self.use_gpu = use_gpu enc_embedder = Embedder(num_embeddings=self.vocab_size, embedding_dim=self.embed_units, padding_idx=self.padding_idx) dec_embedder = enc_embedder self.rnn_encoder = RNNEncoder(input_size=self.embed_units, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) self.mem_encoder = EncoderMemNN(vocab=self.vocab_size, hidden_size=self.hidden_size, hop=self.max_hop, attn_mode='general', padding_idx=self.padding_idx) self.decoder = RNNDecoder(input_size=self.embed_units, hidden_size=self.hidden_size, output_size=self.vocab_size, embedder=dec_embedder, attn_mode=self.attn_mode, attn_hidden_size=self.hidden_size, memory_size=self.hidden_size, feature_size=None, dropout=self.dropout) self.softmax = nn.Softmax(dim=-1) if self.padding_idx is not None: self.weight = torch.ones(self.vocab_size) self.weight[self.padding_idx] = 0 else: self.weight = None self.nll_loss = NLLLoss(weight=self.weight, ignore_index=self.padding_idx, reduction='mean') if self.use_gpu: self.cuda() self.weight = self.weight.cuda()
def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, hidden_size, padding_idx=None, num_layers=1, bidirectional=True, attn_mode="mlp", attn_hidden_size=None, with_bridge=False, tie_embedding=False, dropout=0.0, use_gpu=False, use_dssm=False, weight_control=False, use_pg=False, concat=False, pretrain_epoch=0, with_label=False): super(TwoStagePersonaSeq2Seq, self).__init__() self.src_vocab_size = src_vocab_size self.tgt_vocab_size = tgt_vocab_size self.embed_size = embed_size self.hidden_size = hidden_size self.padding_idx = padding_idx self.num_layers = num_layers self.bidirectional = bidirectional self.attn_mode = attn_mode self.attn_hidden_size = attn_hidden_size self.with_bridge = with_bridge self.tie_embedding = tie_embedding self.dropout = dropout self.use_gpu = use_gpu self.use_dssm = use_dssm self.weight_control = weight_control self.use_pg = use_pg self.pretrain_epoch = pretrain_epoch self.baseline = 0 self.with_label = with_label self.task_id = 1 enc_embedder = Embedder(num_embeddings=self.src_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=enc_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) if self.with_bridge: self.bridge = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh()) if self.tie_embedding: assert self.src_vocab_size == self.tgt_vocab_size dec_embedder = enc_embedder persona_embedder = enc_embedder else: dec_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) persona_embedder = Embedder(num_embeddings=self.tgt_vocab_size, embedding_dim=self.embed_size, padding_idx=self.padding_idx) self.persona_encoder = RNNEncoder(input_size=self.embed_size, hidden_size=self.hidden_size, embedder=persona_embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) self.persona_attention = Attention(query_size=self.hidden_size, memory_size=self.hidden_size, hidden_size=self.hidden_size, mode="general") self.decoder = RNNDecoder(input_size=self.embed_size, hidden_size=self.hidden_size, output_size=self.tgt_vocab_size, embedder=dec_embedder, num_layers=self.num_layers, attn_mode=self.attn_mode, memory_size=self.hidden_size, feature_size=None, dropout=self.dropout, concat=concat, with_label=self.with_label) self.key_linear = nn.Linear(in_features=self.embed_size, out_features=self.hidden_size) if self.use_dssm: self.dssm_project = nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size) self.mse_loss = torch.nn.MSELoss(reduction='mean') self.mse_loss = torch.nn.MSELoss(reduction='mean') if self.padding_idx is not None: self.weight = torch.ones(self.tgt_vocab_size) self.weight[self.padding_idx] = 0 else: self.weight = None self.nll_loss = NLLLoss(weight=self.weight, ignore_index=self.padding_idx, reduction='mean') self.persona_loss = NLLLoss(weight=None, reduction='mean') self.eps = 1e-7 if self.use_gpu: self.cuda() self.weight = self.weight.cuda()
def __init__(self, vocab_size, embed_units, hidden_size, padding_idx=None, num_layers=1, bidirectional=True, attn_mode='mlp', dropout=0.0, with_bridge=True, use_gpu=False): super(PointerNet, self).__init__() self.vocab_size = vocab_size self.embed_units = embed_units self.padding_idx = padding_idx self.hidden_size = hidden_size self.num_layers = num_layers self.bidirectional = bidirectional self.dropout = dropout self.attn_mode = attn_mode self.with_bridge = with_bridge self.use_gpu = use_gpu embedder = Embedder(num_embeddings=self.vocab_size, embedding_dim=self.embed_units, padding_idx=self.padding_idx) self.fact_encoder = RNNEncoder(input_size=self.embed_units, hidden_size=self.hidden_size, embedder=embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) self.hist_encoder = RNNEncoder(input_size=self.embed_units, hidden_size=self.hidden_size, embedder=embedder, num_layers=self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout) self.decoder = PointerDecoder(input_size=self.embed_units, hidden_size=self.hidden_size, output_size=self.vocab_size, embedder=embedder, attn_mode=self.attn_mode, dropout=self.dropout) if self.with_bridge: self.bridge = nn.Sequential( nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh()) if self.padding_idx is not None: self.weight = torch.ones(self.vocab_size) self.weight[self.padding_idx] = 0 else: self.weight = None self.nll_loss = NLLLoss(weight=self.weight, ignore_index=self.padding_idx, reduction='mean') self.kl_loss = torch.nn.KLDivLoss(size_average=True) if self.use_gpu: self.cuda() self.weight = self.weight.cuda()