def __init__(self, params, params_2, path): super(RVAE, self).__init__() self.params = params self.params_2 = params_2 # Encoder-2 parameters self.embedding = Embedding(self.params, path) self.embedding_2 = Embedding(self.params_2, path, True) self.encoder_original = Encoder(self.params) if self.params.hrvae: self.encoder_paraphrase = EncoderHR(self.params_2) else: self.encoder_paraphrase = Encoder(self.params_2) # self.embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder-large/5") self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) if self.params.res_model: self.decoder = DecoderResidual(self.params_2) else: self.decoder = Decoder(self.params_2)
def __init__(self, params, prefix=''): super(RVAE_dilated, self).__init__() self.params = params self.embedding = Embedding(self.params, '', prefix) self.encoder = Encoder(self.params) self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) if self.params.decoder_type == 'gru' or self.params.decoder_type == 'gru_emb': self.decoder = DecoderGRU(self.params) elif self.params.decoder_type == 'lstm': self.decoder = DecoderLSTM(self.params) elif self.params.decoder_type == 'dilation': self.decoder = Decoder(self.params) params_size = 0 params_num = 0 for p in self.parameters(): param_size = 1 for s in p.size(): param_size = param_size * s if p.requires_grad: params_size = params_size + param_size if p.requires_grad: params_num = params_num + 1 #if p.requires_grad: print('Grad Param', type(p.data), p.size()) print('RVAE parameters num[%s] size[%s]' % (params_num, params_size))
def __init__(self, params, regularised): super(RVAE_dilated, self).__init__() self.params = params self.embedding = Embedding(self.params, '') self.regularised = regularised if self.regularised: print("Highly regularised Encoder") self.encoder = HREncoder(self.params) self.layer_dim = self.params.encoder_num_layers * 2 * self.params.encoder_rnn_size self.context_to_mu = nn.Linear(self.layer_dim * 2, self.params.latent_variable_size) self.context_to_logvar = nn.Linear( self.layer_dim * 2, self.params.latent_variable_size) elif not self.regularised: print('Classic encoder') self.encoder = Encoder(self.params) self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.context_to_logvar = nn.Linear( self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.decoder = Decoder(self.params)
def __init__(self, params): super(RVAE, self).__init__() self.params = params self.embedding = Embedding(self.params, '') self.encoder = Encoder(self.params) self.context_to_mu = nn.Linear(self.params.latent_variable_size, self.params.latent_variable_size) self.context_to_logvar = nn.Linear(self.params.latent_variable_size, self.params.latent_variable_size) self.decoder = Decoder(self.params)
def __init__(self, params): super(RVAE, self).__init__() self.params = params self.embedding = Embedding(self.params, '') self.original_encoder = OriginalEncoder(self.params) self.paraphrase_encoder = ParaEncoder(self.params) self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.decoder = Decoder(self.params)
def __init__(self, params,params_2): super(RVAE, self).__init__() self.params = params self.params_2 = params_2 #Encoder-2 parameters self.embedding = Embedding(self.params, '') self.embedding_2 = Embedding(self.params_2, '') self.encoder = Encoder(self.params) self.encoder_2 = Encoder(self.params_2) self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.encoder_3 = Encoder(self.params) self.decoder = Decoder(self.params_2) #change this to params_2
def __init__(self, params: object, params_2: object, path: str) -> None: """ [summary] initializes the RVAE with the correct parameters and data files Args: params (object): [description] parameters for original encoder params_2 (object): [description] parameters for paraphrase encoder path (str): [description] a path to the data files """ super(RVAE, self).__init__() self.params = params self.params_2 = params_2 self.embedding = Embedding(self.params, path) self.embedding_2 = Embedding(self.params_2, path, True) self.encoder_original = Encoder(self.params) if self.params.hrvae: self.encoder_paraphrase = EncoderHR(self.params_2) else: self.encoder_paraphrase = Encoder(self.params_2) self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) if self.params.attn_model and self.params.res_model: self.decoder = DecoderResidualAttention(self.params_2) elif self.params.attn_model: self.decoder = DecoderAttention(self.params_2) elif self.params.res_model: self.decoder = DecoderResidual(self.params_2) else: self.decoder = Decoder(self.params_2)
class RVAE(nn.Module): def __init__(self, params): super(RVAE, self).__init__() self.params = params self.embedding = Embedding(self.params, '') self.encoder = Encoder(self.params) self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.decoder = Decoder(self.params) def forward(self, drop_prob, encoder_word_input=None, encoder_character_input=None, decoder_word_input=None, decoder_character_input=None, z=None, initial_state=None): """ :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type :param initial_state: initial state of decoder rnn in order to perform sampling :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout :param z: context if sampling is performing :return: unnormalized logits of sentence words distribution probabilities with shape of [batch_size, seq_len, word_vocab_size] final rnn state with shape of [num_layers, batch_size, decoder_rnn_size] """ # assert parameters_allocation_check(self), \ # 'Invalid CUDA options. Parameters should be allocated in the same memory' use_cuda = self.embedding.word_embed.weight.is_cuda assert z is None and fold(lambda acc, parameter: acc and parameter is not None, [encoder_word_input, encoder_character_input, decoder_word_input], True) \ or (z is not None and decoder_word_input is not None), \ "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments" if z is None: ''' Get context from encoder and sample z ~ N(mu, std) ''' [batch_size, _] = encoder_word_input.size() encoder_input = self.embedding(encoder_word_input, encoder_character_input) context = self.encoder(encoder_input) mu = self.context_to_mu(context) logvar = self.context_to_logvar(context) std = t.exp(0.5 * logvar) z = Variable( t.randn([batch_size, self.params.latent_variable_size])) if use_cuda: z = z.cuda() z = z * std + mu kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1, 1)).mean().squeeze() else: kld = None decoder_input = self.embedding.word_embed(decoder_word_input) out, final_state = self.decoder(decoder_input, z, drop_prob, initial_state) return out, final_state, kld def learnable_parameters(self): # word_embedding is constant parameter thus it must be dropped from list of parameters for optimizer return [p for p in self.parameters() if p.requires_grad] def trainer(self, optimizer, batch_loader): def train(i, batch_size, use_cuda, dropout): input = batch_loader.next_batch(batch_size, 'train') input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target ] = input logits, _, kld = self(dropout, encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, z=None) logits = logits.view(-1, self.params.word_vocab_size) target = target.view(-1) cross_entropy = F.cross_entropy(logits, target) loss = 79 * cross_entropy + kld_coef(i) * kld optimizer.zero_grad() loss.backward() optimizer.step() return cross_entropy, kld, kld_coef(i) return train def validater(self, batch_loader): def validate(batch_size, use_cuda): input = batch_loader.next_batch(batch_size, 'valid') input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target ] = input logits, _, kld = self(0., encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, z=None) logits = logits.view(-1, self.params.word_vocab_size) target = target.view(-1) cross_entropy = F.cross_entropy(logits, target) return cross_entropy, kld return validate def sample(self, batch_loader, seq_len, seed, use_cuda): seed = Variable(t.from_numpy(seed).float()) if use_cuda: seed = seed.cuda() decoder_word_input_np, decoder_character_input_np = batch_loader.go_input( 1) decoder_word_input = Variable( t.from_numpy(decoder_word_input_np).long()) decoder_character_input = Variable( t.from_numpy(decoder_character_input_np).long()) if use_cuda: decoder_word_input, decoder_character_input = decoder_word_input.cuda( ), decoder_character_input.cuda() result = '' initial_state = None for i in range(seq_len): logits, initial_state, _ = self(0., None, None, decoder_word_input, decoder_character_input, seed, initial_state) logits = logits.view(-1, self.params.word_vocab_size) prediction = F.softmax(logits) word = batch_loader.sample_word_from_distribution( prediction.data.cpu().numpy()[-1]) if word == batch_loader.end_token: break result += ' ' + word decoder_word_input_np = np.array([[batch_loader.word_to_idx[word]] ]) decoder_character_input_np = np.array( [[batch_loader.encode_characters(word)]]) decoder_word_input = Variable( t.from_numpy(decoder_word_input_np).long()) decoder_character_input = Variable( t.from_numpy(decoder_character_input_np).long()) if use_cuda: decoder_word_input, decoder_character_input = decoder_word_input.cuda( ), decoder_character_input.cuda() return result
class RVAE_dilated(nn.Module): def __init__(self, params, prefix=''): super(RVAE_dilated, self).__init__() self.params = params self.embedding = Embedding(self.params, '', prefix) self.encoder = Encoder(self.params) self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) if self.params.decoder_type == 'gru' or self.params.decoder_type == 'gru_emb': self.decoder = DecoderGRU(self.params) elif self.params.decoder_type == 'lstm': self.decoder = DecoderLSTM(self.params) elif self.params.decoder_type == 'dilation': self.decoder = Decoder(self.params) params_size = 0 params_num = 0 for p in self.parameters(): param_size = 1 for s in p.size(): param_size = param_size * s if p.requires_grad: params_size = params_size + param_size if p.requires_grad: params_num = params_num + 1 #if p.requires_grad: print('Grad Param', type(p.data), p.size()) print('RVAE parameters num[%s] size[%s]' % (params_num, params_size)) def forward(self, drop_prob, encoder_word_input=None, encoder_character_input=None, decoder_word_input=None, z=None, initial_state=None): """ :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout :param z: context if sampling is performing :return: unnormalized logits of sentence words distribution probabilities with shape of [batch_size, seq_len, word_vocab_size] kld loss estimation """ assert parameters_allocation_check(self), \ 'Invalid CUDA options. Parameters should be allocated in the same memory' use_cuda = self.embedding.word_embed.weight.is_cuda if not self.params.word_is_char: assert z is None and fold(lambda acc, parameter: acc and parameter is not None, [encoder_word_input, encoder_character_input, decoder_word_input], True) \ or (z is not None and decoder_word_input is not None), \ "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments" if z is None: ''' Get context from encoder and sample z ~ N(mu, std) ''' [batch_size, _] = encoder_word_input.size() encoder_input = self.embedding(encoder_word_input, encoder_character_input) context = self.encoder(encoder_input) mu = self.context_to_mu(context) logvar = self.context_to_logvar(context) std = t.exp(0.5 * logvar) z = Variable( t.randn([batch_size, self.params.latent_variable_size])) if use_cuda: z = z.cuda() z = z * std + mu kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1, 1)).mean().squeeze() else: kld = None decoder_input = self.embedding.word_embed(decoder_word_input) logits_out, final_state = self.decoder(decoder_input, z, drop_prob, initial_state) return logits_out, kld, z, final_state def learnable_parameters(self): # word_embedding is constant parameter thus it must be dropped from list of parameters for optimizer return [p for p in self.parameters() if p.requires_grad] def trainer(self, optimizer, batch_loader): perplexity = Perplexity() def train(i, batch_size, use_cuda, dropout): input = batch_loader.next_batch(batch_size, 'train') input = [(Variable(t.from_numpy(var)) if var is not None else None) for var in input] input = [(var.long() if var is not None else None) for var in input] input = [(var.cuda() if var is not None and use_cuda else var) for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, _, target ] = input logits_out, kld, _, _ = self(dropout, encoder_word_input, encoder_character_input, decoder_word_input, z=None, initial_state=None) if self.params.decoder_type == 'dilation' or self.params.decoder_type == 'gru' or self.params.decoder_type == 'lstm': logits = logits_out.view(-1, self.params.word_vocab_size) target = target.view(-1) cross_entropy = F.cross_entropy(logits, target) # since cross enctropy is averaged over seq_len, it is necessary to approximate new kld loss = 79 * cross_entropy + kld_coef(i) * kld logits = logits.view(batch_size, -1, self.params.word_vocab_size) target = target.view(batch_size, -1) ppl = perplexity(logits, target).mean() optimizer.zero_grad() loss.backward() optimizer.step() return ppl, kld, None elif self.params.decoder_type == 'gru_emb': decoder_target = self.embedding(target, None) error = t.pow(logits_out - decoder_target, 2).mean() ''' loss is constructed fromaveraged over whole batches error formed from squared error between output and target and KL Divergence between p(z) and q(z|x) ''' loss = 400 * error + kld_coef(i) * kld optimizer.zero_grad() loss.backward() optimizer.step() return error, kld, kld_coef(i) return train def validater(self, batch_loader): perplexity = Perplexity() def validate(batch_size, use_cuda): input = batch_loader.next_batch(batch_size, 'valid') input = [ Variable(t.from_numpy(var)) if var is not None else None for var in input ] input = [var.long() if var is not None else None for var in input] input = [ var.cuda() if use_cuda and var is not None else var for var in input ] [ encoder_word_input, encoder_character_input, decoder_word_input, _, target ] = input logits_out, kld, _, _ = self(0., encoder_word_input, encoder_character_input, decoder_word_input, z=None, initial_state=None) if self.params.decoder_type == 'dilation' or self.params.decoder_type == 'gru' or self.params.decoder_type == 'lstm': ppl = perplexity(logits_out, target).mean() return ppl, kld elif self.params.decoder_type == 'gru_emb': decoder_target = self.embedding(target, None) error = t.pow(logits_out - decoder_target, 2).mean() return error, kld return validate def style(self, batch_loader, seq, use_cuda, sample_size=30): decoder_word_input_np, _ = batch_loader.go_input(1) encoder_wids = [] for i in range(len(seq)): word = seq[i] wid = batch_loader.word_to_idx[word] word = np.array([[wid]]) decoder_word_input_np = np.append(decoder_word_input_np, word, 1) encoder_wids.append(wid) encoder_wids = encoder_wids[::-1] encoder_word_input_np = np.array([encoder_wids]) decoder_word_input = Variable( t.from_numpy(decoder_word_input_np)).long() encoder_word_input = Variable( t.from_numpy(encoder_word_input_np)).long() decoder_word_input = t.cat([decoder_word_input] * sample_size, 0) encoder_word_input = t.cat([encoder_word_input] * sample_size, 0) if use_cuda: decoder_word_input = decoder_word_input.cuda() encoder_word_input = encoder_word_input.cuda() if self.params.word_is_char: #TODO only for chinese word right now logits_out, kld, z, final_state = self(0., encoder_word_input, None, decoder_word_input, z=None, initial_state=None) return z.data.cpu().numpy() return None def sample(self, batch_loader, seq_len, seeds, use_cuda, template=None, beam_size=50): (z_num, _) = seeds.shape print("z sample size", z_num, "beam size", beam_size) beam_sent_wids, _ = batch_loader.go_input(1) beam_sent_last_wid = beam_sent_wids[:, -1:] results = [] end_token_id = batch_loader.word_to_idx[batch_loader.end_token] initial_state = None sentence = [] for i in range(seq_len): beam_sent_num = len(beam_sent_wids) if beam_sent_num == 0: break if len(results) >= beam_size: break if self.params.decoder_type == 'dilation' or not self.params.decoder_stateful: beam_z_sent_wids = np.repeat( beam_sent_wids, [z_num], axis=0) if z_num > 1 else beam_sent_wids elif self.params.decoder_type == 'gru' or self.params.decoder_type == 'lstm' or self.params.decoder_type == 'gru_emb': beam_z_sent_wids = np.repeat( beam_sent_last_wid, [z_num], axis=0) if z_num > 1 else beam_sent_last_wid decoder_word_input = Variable( t.from_numpy(beam_z_sent_wids).long()) decoder_word_input = decoder_word_input.cuda( ) if use_cuda else decoder_word_input beam_seeds = Variable(t.from_numpy(seeds).float()) beam_seeds = t.cat([beam_seeds] * beam_sent_num, 0) if beam_sent_num > 1 else beam_seeds beam_seeds = beam_seeds.cuda() if use_cuda else beam_seeds if not self.params.decoder_stateful: initial_state = None elif initial_state is not None and z_num > 1: initial_state = initial_state.view( -1, 1, self.params.decoder_rnn_size) initial_state = initial_state.repeat(1, z_num, 1) initial_state = initial_state.view( self.params.decoder_num_layers, -1, self.params.decoder_rnn_size) beam_sent_logps = None if template and len(template) > i and template[i] != '#': beam_sent_wids = np.column_stack( (beam_sent_wids, [batch_loader.word_to_idx[template[i]]] * beam_sent_num)) beam_sent_last_wid = beam_sent_wids[:, -1:] else: logits_out, _, _, initial_state = self(0., None, None, decoder_word_input, beam_seeds, initial_state) if self.params.decoder_type == 'dilation' or self.params.decoder_type == 'gru' or self.params.decoder_type == 'lstm': [b_z_n, sl, _] = logits_out.size() logits = logits_out.view(-1, self.params.word_vocab_size) prediction = F.softmax(logits) prediction = prediction.view(beam_sent_num, z_num, sl, -1) # take mean of sentence vocab probs for each beam group beam_sent_vps = np.mean(prediction.data.cpu().numpy(), 1) # get vocab probs of the sentence last word for each beam group beam_last_vps = beam_sent_vps[:, -1] beam_last_word_size = min(batch_loader.words_vocab_size, beam_size) # choose last word candidate ids for each beam group beam_choosed_wids = np.array([ np.random.choice(range(batch_loader.words_vocab_size), beam_last_word_size, replace=False, p=last_vps.ravel()).tolist() for last_vps in beam_last_vps ]) # print("candidate shape =", beam_choosed_wids.shape) # dumplicate beam sentence word ids for choosed last word size beam_sent_wids = np.repeat(beam_sent_wids, [beam_last_word_size], axis=0) beam_sent_wids = np.column_stack( (beam_sent_wids, beam_choosed_wids.reshape(-1))) if not self.params.decoder_stateful: initial_state = None elif initial_state is not None: initial_state = initial_state.view( -1, 1, self.params.decoder_rnn_size) initial_state = initial_state.repeat( 1, beam_last_word_size, 1) initial_state = initial_state.view( self.params.decoder_num_layers, -1, self.params.decoder_rnn_size) # get sentence word probs beam_sent_wps = [] whole_or_last = 1 if self.params.decoder_type == 'dilation' or not self.params.decoder_stateful else ( -1 if self.params.decoder_type == 'gru' or self.params.decoder_type == 'lstm' else 0) for i, sent in enumerate(beam_sent_wids): beam_sent_wps.append([]) for j, wid in enumerate(sent[whole_or_last:]): beam_sent_wps[i].append( beam_sent_vps[i // beam_last_word_size][j][wid]) # desc sort sum of the beam sentence log probs beam_sent_logps = np.sum(np.log(beam_sent_wps), axis=1) beam_sent_ids = np.argsort( beam_sent_logps)[-(beam_size - len(results)):][::-1] # get the top beam size sentences beam_sent_wids = beam_sent_wids[beam_sent_ids] beam_sent_logps = np.exp(beam_sent_logps[beam_sent_ids]) #print("candidate", "".join([batch_loader.idx_to_word[wid] for wid in beam_sent_wids[:,-1].reshape(-1)])) if initial_state is not None and len(beam_sent_ids) > 0: idx = Variable(t.from_numpy( beam_sent_ids.copy())).long() initial_state = initial_state.index_select(1, idx) elif self.params.decoder_type == 'gru_emb': [b_z_n, sl, _] = logits_out.size() #TODO out = logits_out.view(-1, self.params.word_embed_size) similarity = self.embedding.similarity(out) similarity = similarity.data.cpu().numpy() similarity = np.mean(similarity, 0) similarity = similarity.view(beam_sent_num, z_num, sl, -1) beam_last_word_size = min(batch_loader.words_vocab_size, beam_size) # choose last word candidate ids for each beam group beam_choosed_wids = np.array([ np.random.choice(range(batch_loader.words_vocab_size), beam_last_word_size, replace=False, p=last_vps.ravel()).tolist() for last_vps in similarity ]) idx = np.random.choice(range( batch_loader.words_vocab_size), replace=False, p=similarity.ravel()) if idx == end_token_id: break beam_sent_wids = np.array([[idx]]) word = batch_loader.idx_to_word[idx] sentence.append(word) if self.params.decoder_type == 'dilation' or self.params.decoder_type == 'gru' or self.params.decoder_type == 'lstm': # check whether some sentence is ended keep = [] for i, sent in enumerate(beam_sent_wids): if sent[-1] == end_token_id: results.append(sent) self.show( batch_loader, sent, beam_sent_logps[i] if beam_sent_logps is not None and len(beam_sent_logps) > i else None) else: keep.append(i) beam_sent_wids = beam_sent_wids[keep] beam_sent_last_wid = beam_sent_wids[:, -1:] #print("last word", "".join([batch_loader.idx_to_word[wid] for wid in beam_sent_last_wid[:,-1].reshape(-1)])) if initial_state is not None and len(keep) > 0: idx = Variable(t.from_numpy(np.array(keep))).long() initial_state = initial_state.index_select(1, idx) if self.params.decoder_type == 'gru_emb': print(u'%s' % ("" if self.params.word_is_char else " ").join(sentence)) return "" else: results_len = len(results) lack_num = beam_size - results_len if lack_num > 0: results = results + beam_sent_wids[:lack_num].tolist() for i, sent in enumerate(results[-lack_num:]): self.show( batch_loader, sent, beam_sent_logps[i + results_len] if beam_sent_logps is not None and len(beam_sent_logps) > i + results_len else None) return results def show(self, batch_loader, sent_wids, sent_logp): print(u'%s==%s' % (("" if self.params.word_is_char else " ").join( [batch_loader.idx_to_word[wid] for wid in sent_wids]), sent_logp))
class RVAE(nn.Module): def __init__(self, params, params_2): super(RVAE, self).__init__() self.params = params self.params_2 = params_2 #Encoder-2 parameters self.embedding = Embedding(self.params, '') self.embedding_2 = Embedding(self.params_2, '', True) self.encoder = Encoder(self.params) self.encoder_2 = Encoder(self.params_2) self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) # self.encoder_3 = Encoder(self.params) self.decoder = Decoder(self.params_2) #change this to params_2 def forward(self, drop_prob, encoder_word_input=None, encoder_character_input=None, encoder_word_input_2=None, encoder_character_input_2=None, decoder_word_input_2=None, decoder_character_input_2=None, z=None, initial_state=None): #Modified the parameters of forward function according to Encoder-2 """ :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type :param initial_state: initial state of decoder rnn in order to perform sampling :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout :param z: context if sampling is performing :return: unnormalized logits of sentence words distribution probabilities with shape of [batch_size, seq_len, word_vocab_size] final rnn state with shape of [num_layers, batch_size, decoder_rnn_size] """ assert parameters_allocation_check(self), \ 'Invalid CUDA options. Parameters should be allocated in the same memory' use_cuda = self.embedding.word_embed.weight.is_cuda assert z is None and fold(lambda acc, parameter: acc and parameter is not None, [encoder_word_input, encoder_character_input, decoder_word_input_2], True) \ or (z is not None and decoder_word_input_2 is not None), \ "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments" if z is None: ''' Get context from encoder and sample z ~ N(mu, std) ''' [batch_size, _] = encoder_word_input.size() encoder_input = self.embedding(encoder_word_input, encoder_character_input) ''' ===================================================Doing the same for encoder-2=================================================== ''' [batch_size_2, _] = encoder_word_input_2.size() encoder_input_2 = self.embedding_2(encoder_word_input_2, encoder_character_input_2) ''' ================================================================================================================================== ''' context, h_0, c_0 = self.encoder(encoder_input, None) State = (h_0, c_0) #Final state of Encoder-1 context_2, _, _ = self.encoder_2(encoder_input_2, State) #Encoder_2 for Ques_2 mu = self.context_to_mu(context_2) logvar = self.context_to_logvar(context_2) std = t.exp(0.5 * logvar) z = Variable( t.randn([batch_size, self.params.latent_variable_size])) if use_cuda: z = z.cuda() z = z * std + mu kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1, 1)).mean().squeeze() # encoder_input = self.embedding(encoder_word_input, encoder_character_input) # _ , h_0 , c_0 = self.encoder_3(encoder_input, None) initial_state = State #Final state of Encoder-1 else: kld = None mu = None std = None decoder_input_2 = self.embedding_2.word_embed( decoder_word_input_2 ) # What to do with this decoder input ? --> Slightly resolved out, final_state = self.decoder( decoder_input_2, z, drop_prob, initial_state) # Take a look at the decoder return out, final_state, kld, mu, std def learnable_parameters(self): # word_embedding is constant parameter thus it must be dropped from list of parameters for optimizer return [p for p in self.parameters() if p.requires_grad] def trainer(self, optimizer, batch_loader, batch_loader_2): def train(i, batch_size, use_cuda, dropout, start_index): input = batch_loader.next_batch(batch_size, 'train', start_index) input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target ] = input ''' =================================================== Input for Encoder-2 ======================================================== ''' input_2 = batch_loader_2.next_batch(batch_size, 'train', start_index) input_2 = [Variable(t.from_numpy(var)) for var in input_2] input_2 = [var.long() for var in input_2] input_2 = [var.cuda() if use_cuda else var for var in input_2] [ encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, target ] = input_2 ''' ================================================================================================================================ ''' # exit() logits, _, kld, _, _ = self(dropout, encoder_word_input, encoder_character_input, encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, z=None) # logits = logits.view(-1, self.params.word_vocab_size) logits = logits.view(-1, self.params_2.word_vocab_size) target = target.view(-1) cross_entropy = F.cross_entropy(logits, target) loss = 79 * cross_entropy + kld_coef(i) * kld optimizer.zero_grad() loss.backward() optimizer.step() return cross_entropy, kld, kld_coef(i) return train def validater(self, batch_loader, batch_loader_2): def validate(batch_size, use_cuda, start_index): input = batch_loader.next_batch(batch_size, 'valid', start_index) input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target ] = input ''' ==================================================== Input for Encoder-2 ======================================================== ''' input_2 = batch_loader_2.next_batch(batch_size, 'valid', start_index) input_2 = [Variable(t.from_numpy(var)) for var in input_2] input_2 = [var.long() for var in input_2] input_2 = [var.cuda() if use_cuda else var for var in input_2] [ encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, target ] = input_2 ''' ================================================================================================================================== ''' logits, _, kld, _, _ = self(0., encoder_word_input, encoder_character_input, encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, z=None) # logits = logits.view(-1, self.params.word_vocab_size) logits = logits.view(-1, self.params_2.word_vocab_size) target = target.view(-1) cross_entropy = F.cross_entropy(logits, target) return cross_entropy, kld return validate def sample(self, batch_loader, seq_len, seed, use_cuda, State): # seed = Variable(t.from_numpy(seed).float()) if use_cuda: seed = seed.cuda() decoder_word_input_np, decoder_character_input_np = batch_loader.go_input( 1) decoder_word_input = Variable( t.from_numpy(decoder_word_input_np).long()) decoder_character_input = Variable( t.from_numpy(decoder_character_input_np).long()) if use_cuda: decoder_word_input, decoder_character_input = decoder_word_input.cuda( ), decoder_character_input.cuda() result = '' initial_state = State for i in range(seq_len): logits, initial_state, _, _, _ = self(0., None, None, None, None, decoder_word_input, decoder_character_input, seed, initial_state) # forward(self, drop_prob, # encoder_word_input=None, encoder_character_input=None, # encoder_word_input_2=None, encoder_character_input_2=None, # decoder_word_input_2=None, decoder_character_input_2=None, # z=None, initial_state=None): # logits = logits.view(-1, self.params.word_vocab_size) # logits = logits.view(-1, self.params.word_vocab_size) logits = logits.view(-1, self.params_2.word_vocab_size) # print '---------------------------------------' # print 'Printing logits' # print logits # print '------------------------------------------' prediction = F.softmax(logits) word = batch_loader.sample_word_from_distribution( prediction.data.cpu().numpy()[-1]) if word == batch_loader.end_token: break result += ' ' + word decoder_word_input_np = np.array([[batch_loader.word_to_idx[word]] ]) decoder_character_input_np = np.array( [[batch_loader.encode_characters(word)]]) decoder_word_input = Variable( t.from_numpy(decoder_word_input_np).long()) decoder_character_input = Variable( t.from_numpy(decoder_character_input_np).long()) if use_cuda: decoder_word_input, decoder_character_input = decoder_word_input.cuda( ), decoder_character_input.cuda() return result def sampler(self, batch_loader, batch_loader_2, seq_len, seed, use_cuda, i, beam_size, n_best): input = batch_loader.next_batch(1, 'valid', i) input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target ] = input encoder_input = self.embedding(encoder_word_input, encoder_character_input) _, h0, c0 = self.encoder(encoder_input, None) State = (h0, c0) # print '----------------------' # print 'Printing h0 ---------->' # print h0 # print '----------------------' # State = None results, scores = self.sample_beam(batch_loader_2, seq_len, seed, use_cuda, State, beam_size, n_best) return results, scores def sample_beam(self, batch_loader, seq_len, seed, use_cuda, State, beam_size, n_best): # seed = Variable(t.from_numpy(seed).float()) if use_cuda: seed = seed.cuda() decoder_word_input_np, decoder_character_input_np = batch_loader.go_input( 1) decoder_word_input = Variable( t.from_numpy(decoder_word_input_np).long()) decoder_character_input = Variable( t.from_numpy(decoder_character_input_np).long()) if use_cuda: decoder_word_input, decoder_character_input = decoder_word_input.cuda( ), decoder_character_input.cuda() dec_states = State # print '========= Before ================' # print "dec_states:", dec_states[0].size() # print "dec_states:", dec_states[1].size() # print '==================================' # dec_states = [ # Variable(dec_states[0].repeat(1, beam_size, 1)), # Variable(dec_states[1].repeat(1, beam_size, 1)) # ] dec_states = [ dec_states[0].repeat(1, beam_size, 1), dec_states[1].repeat(1, beam_size, 1) ] # print'========== After ==================' # print "dec_states:", dec_states[0].size() # print "dec_states:", dec_states[1].size() # print '==================================' # exit() drop_prob = 0.0 beam_size = beam_size batch_size = 1 beam = [ Beam(beam_size, batch_loader, cuda=True) for k in range(batch_size) ] batch_idx = list(range(batch_size)) remaining_sents = batch_size for i in range(seq_len): input = t.stack([ b.get_current_state() for b in beam if not b.done ]).t().contiguous().view(1, -1) trg_emb = self.embedding_2.word_embed( Variable(input).transpose(1, 0)) # print trg_emb.size() # print seed.size() trg_h, dec_states = self.decoder.only_decoder_beam( trg_emb, seed, drop_prob, dec_states) # trg_h, (trg_h_t, trg_c_t) = self.model.decoder(trg_emb, (dec_states[0].squeeze(0), dec_states[1].squeeze(0)), context ) # print trg_h.size() # print trg_h_t.size() # print trg_c_t.size() # dec_states = (trg_h_t, trg_c_t) # print 'State dimension ----------->' # print State[0].size() # print State[1].size() # print '=======================================' # print "dec_states:", dec_states[0].size() # print "dec_states:", dec_states[1].size() # print '========== Things successful ===========' # exit() dec_out = trg_h.squeeze(1) # print "dec_out:", dec_out.size() out = F.softmax(self.decoder.fc(dec_out)).unsqueeze(0) word_lk = out.view(beam_size, remaining_sents, -1).transpose(0, 1).contiguous() active = [] for b in range(batch_size): if beam[b].done: continue idx = batch_idx[b] if not beam[b].advance(word_lk.data[idx]): active += [b] for dec_state in dec_states: # iterate over h, c # layers x beam*sent x dim sent_states = dec_state.view(-1, beam_size, remaining_sents, dec_state.size(2))[:, :, idx] sent_states.data.copy_( sent_states.data.index_select( 1, beam[b].get_current_origin())) if not active: break # in this section, the sentences that are still active are # compacted so that the decoder is not run on completed sentences active_idx = t.cuda.LongTensor([batch_idx[k] for k in active]) batch_idx = {beam: idx for idx, beam in enumerate(active)} def update_active(t): # select only the remaining active sentences view = t.data.view(-1, remaining_sents, self.params.decoder_rnn_size) new_size = list(t.size()) new_size[-2] = new_size[-2] * len(active_idx) \ // remaining_sents return Variable( view.index_select(1, active_idx).view(*new_size)) dec_states = (update_active(dec_states[0]), update_active(dec_states[1])) dec_out = update_active(dec_out) # context = update_active(context) remaining_sents = len(active) # (4) package everything up allHyp, allScores = [], [] for b in range(batch_size): scores, ks = beam[b].sort_best() # print scores # print ks allScores += [scores[:n_best]] hyps = zip(*[beam[b].get_hyp(k) for k in ks[:n_best]]) # print hyps # print "------------------" allHyp += [hyps] # print '==== Complete =========' return allHyp, allScores
class RVAE(nn.Module): def __init__(self, params, params_2, path): super(RVAE, self).__init__() self.params = params self.params_2 = params_2 # Encoder-2 parameters self.embedding = Embedding(self.params, path) self.embedding_2 = Embedding(self.params_2, path, True) self.encoder_original = Encoder(self.params) if self.params.hrvae: self.encoder_paraphrase = EncoderHR(self.params_2) else: self.encoder_paraphrase = Encoder(self.params_2) # self.embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder-large/5") self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) if self.params.res_model: self.decoder = DecoderResidual(self.params_2) else: self.decoder = Decoder(self.params_2) def forward( self, unk_idx, drop_prob, encoder_word_input=None, encoder_character_input=None, encoder_word_input_2=None, encoder_character_input_2=None, decoder_word_input_2=None, decoder_character_input_2=None, z=None, initial_state=None, ): # Modified the parameters of forward function according to Encoder-2 """ :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type :param initial_state: initial state of decoder rnn in order to perform sampling :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout :param z: context if sampling is performing :return: unnormalized logits of sentence words distribution probabilities with shape of [batch_size, seq_len, word_vocab_size] final rnn state with shape of [num_layers, batch_size, decoder_rnn_size] """ assert parameters_allocation_check( self ), "Invalid CUDA options. Parameters should be allocated in the same memory" use_cuda = self.embedding.word_embed.weight.is_cuda assert ( z is None and fold( lambda acc, parameter: acc and parameter is not None, [ encoder_word_input, encoder_character_input, decoder_word_input_2 ], True, ) or (z is not None and decoder_word_input_2 is not None) ), "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments" if z is None: """Get context from encoder and sample z ~ N(mu, std)""" # 把word和character拼接成一个向量 [batch_size, _] = encoder_word_input.size() encoder_input = self.embedding(encoder_word_input, encoder_character_input, unk_idx, drop_prob) """ ===================================================Doing the same for encoder-2=================================================== """ [batch_size_2, _] = encoder_word_input_2.size() encoder_input_2 = self.embedding_2(encoder_word_input_2, encoder_character_input_2, unk_idx, drop_prob) """ ================================================================================================================================== """ enc_out_original, context, h_0, c_0, _ = self.encoder_original( encoder_input, None) state_original = (h_0, c_0) # Final state of Encoder-1 原始句子编码 # state_original = context enc_out_paraphrase, context_2, h_0, c_0, context_ = self.encoder_paraphrase( encoder_input_2, state_original) # Encoder_2 for Ques_2 接下去跟释义句编码 state_paraphrase = (h_0, c_0) # Final state of Encoder-2 原始句子编码 # state_paraphrase = context_2 if context_ is not None: mu_ = [] logvar_ = [] for entry in context_: mu_.append(self.context_to_mu(entry)) logvar_.append(self.context_to_logvar(entry)) z_sampled = self.sample_gaussian(batch_size) if use_cuda: z_sampled = z_sampled.cuda() mu = t.stack(mu_) logvar = t.stack(logvar_) if self.params.wae: z_tilda = self.sample_z_tilda_from_posterior( z_sampled, logvar_[-1], mu_[-1], 1).cuda() p = t.distributions.Normal(mu, t.exp(logvar)) q = t.distributions.Normal(mu, t.ones(logvar.size()).cuda()) kld = t.sum(t.distributions.kl_divergence(p, q)) kld = kld / mu.shape[0] kld = 0 for i in range(len(mu_)): p = t.distributions.Normal(mu_[i], t.exp(logvar_[i])) q = t.distributions.Normal( mu_[i], t.ones(logvar.size()).cuda()) kld += t.sum(t.distributions.kl_divergence(p, q)) kld = kld / len(mu_) wasserstein_loss = self.imq_kernel( z_sampled, z_tilda, self.params.latent_variable_size) kld = 0.01 * kld + 10 * wasserstein_loss else: z_tilda = self.sample_z_tilda_from_posterior( z_sampled, logvar_[-1], mu_[-1], 0.5).cuda() kld = 0 for i in range(len(mu_)): kld += (-0.5 * t.sum( logvar_[i] - t.pow(mu_[i], 2) - t.exp(logvar_[i]) + 1, 1)).mean().squeeze() kld = kld / len(mu_) else: mu = self.context_to_mu(context_2) logvar = self.context_to_logvar(context_2) z_sampled = self.sample_gaussian(batch_size) if use_cuda: z_sampled = z_sampled.cuda() if self.params.wae: z_tilda = self.sample_z_tilda_from_posterior( z_sampled, logvar, mu, 1).cuda() p = t.distributions.Normal(mu, t.exp(logvar)) q = t.distributions.Normal(mu, t.ones(logvar.size()).cuda()) kld = t.sum(t.distributions.kl_divergence(p, q)) wasserstein_loss = self.imq_kernel( z_sampled, z_tilda, self.params.latent_variable_size) kld = 0.01 * kld + 10 * wasserstein_loss else: z_tilda = self.sample_z_tilda_from_posterior( z_sampled, logvar, mu, 0.5).cuda() kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1, 1)).mean().squeeze() else: kld = None mu = None std = None # What to do with this decoder input ? --> Slightly resolved decoder_input_2 = self.embedding_2.word_embed(decoder_word_input_2) # if context_ is not None: # decoder_input_2 = t.ones(decoder_input_2.size()).cuda() out, final_state = self.decoder(decoder_input_2, z_tilda, drop_prob, enc_out_paraphrase, state_original) return out, final_state, kld, mu, None def sample_z_tilda_from_posterior(self, z_sampled, z_log_sigma, z_mean, z_temperature=0.5): """(Differentiably!) draw sample from Gaussian with given shape, subject to random noise epsilon""" return z_sampled * t.exp( z_log_sigma * z_temperature) + z_mean # N(mu, I * sigma**2) def sample_gaussian(self, batch_size): """(Differentiably!) draw sample from Gaussian with given shape, subject to random noise epsilon""" return Variable(t.randn([batch_size, self.params.latent_variable_size ])) # Dimension [batch_size x latent_dim] def imq_kernel(self, sample_qz: t.Tensor, sample_pz: t.Tensor, h_dim: int): batch_size = sample_pz.size(0) norms_pz = sample_pz.pow(2).sum(1, keepdim=True) # batch_size x 1 prods_pz = t.mm(sample_pz, sample_pz.t()) # batch_size x batch_size dists_pz = norms_pz + norms_pz.t() - 2 * prods_pz norms_qz = sample_qz.pow(2).sum(1, keepdim=True) # batch_size x 1 prods_qz = t.mm(sample_qz, sample_qz.t()) # batch_size x batch_size dists_qz = norms_qz + norms_qz.t() - 2 * prods_qz dotprods = t.mm(sample_qz, sample_pz.t()) distances = norms_qz + norms_pz.t() - 2 * dotprods stats = 0 Cbase = 2.0 * h_dim * 2.0 * 1.0 for scale in [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]: C = Cbase * scale res1 = C / (C + dists_qz) res1 += C / (C + dists_pz) if t.cuda.is_available(): res1 = (1 - t.eye(batch_size).cuda()) * res1 else: res1 = (1 - t.eye(batch_size)) * res1 res1 = res1.sum() / (batch_size * batch_size - batch_size) res2 = C / (C + distances) res2 = res2.sum() * 2.0 / (batch_size * batch_size) stats += res1 - res2 return stats def learnable_parameters(self): # wordembedding是固定值,因此必须从优化器的参数列表里移除。 # word_embedding is constant parameter thus it must be dropped from list of parameters for optimizer return [p for p in self.parameters() if p.requires_grad] def trainer(self, optimizer, batch_loader, batch_loader_2): def train(coef, batch_size, use_cuda, dropout, start_index): input = batch_loader.next_batch(batch_size, "train", start_index) input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] # 这里是data/train.txt,转换变成embedding,用pand补齐, # 其中encoder_word_input, encoder_character_input是将 xo原始句输入倒过来前面加若干占位符, # decoder_word_input, decoder_character_input是 xo原始句加了开始符号末端补齐 # target,结束句子后面加了结束符,target是xo原始句加结束符后面加若干占位符 [ encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target, _, ] = input """ =================================================== Input for Encoder-2 ======================================================== """ input_2 = batch_loader_2.next_batch(batch_size, "train", start_index) input_2 = [Variable(t.from_numpy(var)) for var in input_2] input_2 = [var.long() for var in input_2] input_2 = [var.cuda() if use_cuda else var for var in input_2] # 这里是data/super/train.txt,转换变成embedding,用pand补齐, # 其中encoder_word_input, encoder_character_input是将 释义句xp输入倒过来前面加若干占位符, # decoder_word_input, decoder_character_input是 释义句xp加了开始符号末端补齐 # target,结束句子后面加了结束符,target是释义句xp加结束符后面加若干占位符 [ encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, target, _, ] = input_2 unk_idx = None """ ================================================================================================================================ """ # 这里encoder-input是原始句子xo的输入(句子翻转),encoder-input2是释义句xp的输入(句子翻转),decoder-input是释义句加加开始符号 logits, _, kld, _, _ = self( unk_idx, dropout, encoder_word_input, encoder_character_input, encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, z=None, ) logits = logits.view(-1, self.params_2.word_vocab_size) target = target.view(-1) # 前面logit 是每一步输出的词汇表所有词的概率, target是每一步对应的词的索引不用变成onehot,函数内部做变换 cross_entropy = F.cross_entropy(logits, target) if self.params.wae: loss = 1 * cross_entropy + coef * kld # 79应该是作者拍脑袋的 elif self.params.hrvae: loss = 79 * cross_entropy + coef * kld # 79应该是作者拍脑袋的 else: loss = 79 * cross_entropy + coef * kld # 79应该是作者拍脑袋的 optimizer.zero_grad() # 标准用法先计算损失函数值,然后初始化梯度为0, loss.backward() # 然后反向传递 optimizer.step() # 反向跟新梯度 return cross_entropy, kld, coef # 交叉熵,kl-devergence,kld-coef是为了让他 return train def validater(self, batch_loader, batch_loader_2): def validate(batch_size, use_cuda, start_index): input = batch_loader.next_batch(batch_size, "valid", start_index) input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target ] = input """ ==================================================== Input for Encoder-2 ======================================================== """ input_2 = batch_loader_2.next_batch(batch_size, "valid", start_index) input_2 = [Variable(t.from_numpy(var)) for var in input_2] input_2 = [var.long() for var in input_2] input_2 = [var.cuda() if use_cuda else var for var in input_2] [ encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, target, ] = input_2 """ ================================================================================================================================== """ unk_idx = batch_loader_2.word_to_idx[batch_loader_2.unk_token] logits, _, kld, _, _ = self( unk_idx, 0.0, encoder_word_input, encoder_character_input, encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, z=None, ) # logits = logits.view(-1, self.params.word_vocab_size) logits = logits.view(-1, self.params_2.word_vocab_size) target = target.view(-1) cross_entropy = F.cross_entropy(logits, target) return cross_entropy, kld return validate def sample(self, batch_loader, seq_len, seed, use_cuda, State): # seed = Variable(t.from_numpy(seed).float()) # seed = Variable(t.randn([1, parameters.latent_variable_size])) if use_cuda: seed = seed.cuda() decoder_word_input_np, decoder_character_input_np = batch_loader.go_input( 1) decoder_word_input = Variable( t.from_numpy(decoder_word_input_np).long()) decoder_character_input = Variable( t.from_numpy(decoder_character_input_np).long()) if use_cuda: decoder_word_input, decoder_character_input = decoder_word_input.cuda( ), decoder_character_input.cuda() result = "" initial_state = State for i in range(seq_len): logits, initial_state, _, _, _ = self(0.0, None, None, None, None, decoder_word_input, decoder_character_input, seed, initial_state) # forward(self, drop_prob, # encoder_word_input=None, encoder_character_input=None, # encoder_word_input_2=None, encoder_character_input_2=None, # decoder_word_input_2=None, decoder_character_input_2=None, # z=None, initial_state=None): # logits = logits.view(-1, self.params.word_vocab_size) # logits = logits.view(-1, self.params.word_vocab_size) logits = logits.view(-1, self.params_2.word_vocab_size) # print '---------------------------------------' # print 'Printing logits' # print logits # print '------------------------------------------' prediction = F.softmax(logits) word = batch_loader.sample_word_from_distribution( prediction.data.cpu().numpy()[-1]) if word == batch_loader.end_token: break result += " " + word decoder_word_input_np = np.array([[batch_loader.word_to_idx[word]] ]) decoder_character_input_np = np.array( [[batch_loader.encode_characters(word)]]) decoder_word_input = Variable( t.from_numpy(decoder_word_input_np).long()) decoder_character_input = Variable( t.from_numpy(decoder_character_input_np).long()) if use_cuda: decoder_word_input, decoder_character_input = decoder_word_input.cuda( ), decoder_character_input.cuda() return result def sampler(self, batch_loader, batch_loader_2, seq_len, seed, use_cuda, i, beam_size, n_best): input = batch_loader.next_batch(1, "valid", i) input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target, _ ] = input encoder_input = self.embedding(encoder_word_input, encoder_character_input) encoder_output, _, h0, c0, _ = self.encoder_original( encoder_input, None) State = (h0, c0) # print '----------------------' # print 'Printing h0 ---------->' # print h0 # print '----------------------' # State = None results, scores = self.sample_beam(batch_loader_2, seq_len, seed, use_cuda, State, beam_size, n_best, encoder_output) return results, scores def sample_beam(self, batch_loader, seq_len, seed, use_cuda, State, beam_size, n_best, encoder_output): # seed = Variable(t.from_numpy(seed).float()) if use_cuda: seed = seed.cuda() decoder_word_input_np, decoder_character_input_np = batch_loader.go_input( 1) decoder_word_input = Variable( t.from_numpy(decoder_word_input_np).long()) decoder_character_input = Variable( t.from_numpy(decoder_character_input_np).long()) if use_cuda: decoder_word_input, decoder_character_input = decoder_word_input.cuda( ), decoder_character_input.cuda() dec_states = State # print '========= Before ================' # print "dec_states:", dec_states[0].size() # print "dec_states:", dec_states[1].size() # print '==================================' # dec_states = [ # Variable(dec_states[0].repeat(1, beam_size, 1)), # Variable(dec_states[1].repeat(1, beam_size, 1)) # ] dec_states = [ dec_states[0].repeat(1, beam_size, 1), dec_states[1].repeat(1, beam_size, 1) ] # print'========== After ==================' # print "dec_states:", dec_states[0].size() # print "dec_states:", dec_states[1].size() # print '==================================' # exit() drop_prob = 0.0 beam_size = beam_size batch_size = 1 beam = [ Beam(beam_size, batch_loader, cuda=True) for k in range(batch_size) ] batch_idx = list(range(batch_size)) remaining_sents = batch_size for i in range(seq_len): input = t.stack([ b.get_current_state() for b in beam if not b.done ]).t().contiguous().view(1, -1) trg_emb = self.embedding_2.word_embed( Variable(input).transpose(1, 0)) trg_h, dec_states = self.decoder.only_decoder_beam( trg_emb, seed, drop_prob, encoder_output, dec_states) # trg_h, (trg_h_t, trg_c_t) = self.model.decoder(trg_emb, (dec_states[0].squeeze(0), dec_states[1].squeeze(0)), context ) # print trg_h.size() # print trg_h_t.size() # print trg_c_t.size() # dec_states = (trg_h_t, trg_c_t) # print 'State dimension ----------->' # print State[0].size() # print State[1].size() # print '=======================================' # print "dec_states:", dec_states[0].size() # print "dec_states:", dec_states[1].size() # print '========== Things successful ===========' # exit() dec_out = trg_h.squeeze(1) # print "dec_out:", dec_out.size() out = F.softmax(self.decoder.fc(dec_out)).unsqueeze(0) word_lk = out.view(beam_size, remaining_sents, -1).transpose(0, 1).contiguous() active = [] for b in range(batch_size): if beam[b].done: continue idx = batch_idx[b] if not beam[b].advance(word_lk.data[idx]): active += [b] for dec_state in dec_states: # iterate over h, c # layers x beam*sent x dim sent_states = dec_state.view(-1, beam_size, remaining_sents, dec_state.size(2))[:, :, idx] sent_states.data.copy_( sent_states.data.index_select( 1, beam[b].get_current_origin())) if not active: break # in this section, the sentences that are still active are # compacted so that the decoder is not run on completed sentences active_idx = t.cuda.LongTensor([batch_idx[k] for k in active]) batch_idx = {beam: idx for idx, beam in enumerate(active)} def update_active(t): # select only the remaining active sentences view = t.data.view(-1, remaining_sents, self.params.decoder_rnn_size) new_size = list(t.size()) new_size[-2] = new_size[-2] * len( active_idx) // remaining_sents return Variable( view.index_select(1, active_idx).view(*new_size)) dec_states = (update_active(dec_states[0]), update_active(dec_states[1])) dec_out = update_active(dec_out) # context = update_active(context) remaining_sents = len(active) # (4) package everything up allHyp, allScores = [], [] for b in range(batch_size): scores, ks = beam[b].sort_best() # print scores # print ks allScores += [scores[:n_best]] hyps = zip(*[beam[b].get_hyp(k) for k in ks[:n_best]]) # print hyps # print "------------------" allHyp += [hyps] # print '==== Complete =========' return allHyp, allScores
class RVAE(nn.Module): def __init__(self, params,params_2): super(RVAE, self).__init__() self.params = params self.params_2 = params_2 #Encoder-2 parameters self.embedding = Embedding(self.params, '') self.embedding_2 = Embedding(self.params_2, '') self.encoder = Encoder(self.params) self.encoder_2 = Encoder(self.params_2) self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.encoder_3 = Encoder(self.params) self.decoder = Decoder(self.params_2) #change this to params_2 def forward(self, drop_prob, encoder_word_input=None, encoder_character_input=None, encoder_word_input_2=None, encoder_character_input_2=None, decoder_word_input_2=None, decoder_character_input_2=None, z=None, initial_state=None): #Modified the parameters of forward function according to Encoder-2 """ :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type :param initial_state: initial state of decoder rnn in order to perform sampling :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout :param z: context if sampling is performing :return: unnormalized logits of sentence words distribution probabilities with shape of [batch_size, seq_len, word_vocab_size] final rnn state with shape of [num_layers, batch_size, decoder_rnn_size] """ assert parameters_allocation_check(self), \ 'Invalid CUDA options. Parameters should be allocated in the same memory' use_cuda = self.embedding.word_embed.weight.is_cuda assert z is None and fold(lambda acc, parameter: acc and parameter is not None, [encoder_word_input, encoder_character_input, decoder_word_input_2], True) \ or (z is not None and decoder_word_input_2 is not None), \ "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments" if z is None: ''' Get context from encoder and sample z ~ N(mu, std) ''' [batch_size, _] = encoder_word_input.size() encoder_input = self.embedding(encoder_word_input, encoder_character_input) ''' ===================================================Doing the same for encoder-2=================================================== ''' [batch_size_2, _] = encoder_word_input_2.size() encoder_input_2 = self.embedding_2(encoder_word_input_2, encoder_character_input_2) ''' ================================================================================================================================== ''' context , h_0 , c_0 = self.encoder(encoder_input, None) State = (h_0,c_0) #Final state of Encoder-1 context_2 , _ , _ = self.encoder_2( encoder_input_2, State ) #Encoder_2 for Ques_2 mu = self.context_to_mu(context_2) logvar = self.context_to_logvar(context_2) std = t.exp(0.5 * logvar) z = Variable(t.randn([batch_size, self.params.latent_variable_size])) if use_cuda: z = z.cuda() z = z * std + mu kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1, 1)).mean().squeeze() encoder_input = self.embedding(encoder_word_input, encoder_character_input) _ , h_0 , c_0 = self.encoder_3(encoder_input, None) initial_state = (h_0,c_0) #Final state of Encoder-1 else: kld = None decoder_input_2 = self.embedding.word_embed(decoder_word_input_2) # What to do with this decoder input ? --> Slightly resolved out, final_state = self.decoder(decoder_input_2, z, drop_prob, initial_state) # Take a look at the decoder return out, final_state, kld def learnable_parameters(self): # word_embedding is constant parameter thus it must be dropped from list of parameters for optimizer return [p for p in self.parameters() if p.requires_grad] def trainer(self, optimizer, batch_loader, batch_loader_2): def train(i, batch_size, use_cuda, dropout, start_index): input = batch_loader.next_batch(batch_size, 'train', start_index) input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target] = input ''' =================================================== Input for Encoder-2 ======================================================== ''' input_2 = batch_loader_2.next_batch(batch_size, 'train', start_index) input_2 = [Variable(t.from_numpy(var)) for var in input_2] input_2 = [var.long() for var in input_2] input_2 = [var.cuda() if use_cuda else var for var in input_2] [encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, target] = input_2 ''' ================================================================================================================================ ''' # exit() logits, _, kld = self(dropout, encoder_word_input, encoder_character_input, encoder_word_input_2,encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, z=None) # logits = logits.view(-1, self.params.word_vocab_size) logits = logits.view(-1, self.params_2.word_vocab_size) target = target.view(-1) cross_entropy = F.cross_entropy(logits, target) loss = 79 * cross_entropy + kld_coef(i) * kld optimizer.zero_grad() loss.backward() optimizer.step() return cross_entropy, kld, kld_coef(i) return train def validater(self, batch_loader,batch_loader_2): def validate(batch_size, use_cuda, start_index): input = batch_loader.next_batch(batch_size, 'valid', start_index) input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target] = input ''' ==================================================== Input for Encoder-2 ======================================================== ''' input_2 = batch_loader_2.next_batch(batch_size, 'valid', start_index) input_2 = [Variable(t.from_numpy(var)) for var in input_2] input_2 = [var.long() for var in input_2] input_2 = [var.cuda() if use_cuda else var for var in input_2] [encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, target] = input_2 ''' ================================================================================================================================== ''' logits, _, kld = self(0., encoder_word_input, encoder_character_input, encoder_word_input_2,encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, z=None) # logits = logits.view(-1, self.params.word_vocab_size) logits = logits.view(-1, self.params_2.word_vocab_size) target = target.view(-1) cross_entropy = F.cross_entropy(logits, target) return cross_entropy, kld return validate def sample(self, batch_loader, seq_len, seed, use_cuda, State): seed = Variable(t.from_numpy(seed).float()) if use_cuda: seed = seed.cuda() decoder_word_input_np, decoder_character_input_np = batch_loader.go_input(1) decoder_word_input = Variable(t.from_numpy(decoder_word_input_np).long()) decoder_character_input = Variable(t.from_numpy(decoder_character_input_np).long()) if use_cuda: decoder_word_input, decoder_character_input = decoder_word_input.cuda(), decoder_character_input.cuda() result = '' initial_state = State for i in range(seq_len): logits, initial_state, _ = self(0., None, None, None, None, decoder_word_input, decoder_character_input, seed, initial_state) # forward(self, drop_prob, # encoder_word_input=None, encoder_character_input=None, # encoder_word_input_2=None, encoder_character_input_2=None, # decoder_word_input_2=None, decoder_character_input_2=None, # z=None, initial_state=None): # logits = logits.view(-1, self.params.word_vocab_size) # logits = logits.view(-1, self.params.word_vocab_size) logits = logits.view(-1, self.params_2.word_vocab_size) # print '---------------------------------------' # print 'Printing logits' # print logits # print '------------------------------------------' prediction = F.softmax(logits) word = batch_loader.sample_word_from_distribution(prediction.data.cpu().numpy()[-1]) if word == batch_loader.end_token: break result += ' ' + word decoder_word_input_np = np.array([[batch_loader.word_to_idx[word]]]) decoder_character_input_np = np.array([[batch_loader.encode_characters(word)]]) decoder_word_input = Variable(t.from_numpy(decoder_word_input_np).long()) decoder_character_input = Variable(t.from_numpy(decoder_character_input_np).long()) if use_cuda: decoder_word_input, decoder_character_input = decoder_word_input.cuda(), decoder_character_input.cuda() return result def sampler(self, batch_loader, seq_len, seed, use_cuda): input = batch_loader.next_batch(1, 'valid', 1) input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target] = input encoder_input = self.embedding(encoder_word_input, encoder_character_input) _ , h0 , c0 = self.encoder_3(encoder_input, None) State = (h0,c0) # print '----------------------' # print 'Printing h0 ---------->' # print h0 # print '----------------------' # State = None result = self.sample(batch_loader, seq_len, seed, use_cuda, State) return result
class RVAE(nn.Module): def __init__(self, params, params_2): super(RVAE, self).__init__() self.params = params self.params_2 = params_2 # Encoder-2 parameters self.embedding = Embedding(self.params, '') self.embedding_2 = Embedding(self.params_2, '', True) self.encoder = Encoder(self.params) self.encoder_2 = Encoder(self.params_2) # self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) # self.encoder_3 = Encoder(self.params) self.decoder = Decoder(self.params_2) # change this to params_2 def forward(self, drop_prob, encoder_word_input=None, encoder_character_input=None, encoder_word_input_2=None, encoder_character_input_2=None, decoder_word_input_2=None, decoder_character_input_2=None, z=None, initial_state=None): assert parameters_allocation_check(self) use_cuda = self.embedding.word_embed.weight.is_cuda assert z is None and fold(lambda acc, parameter: acc and parameter is not None, [encoder_word_input, encoder_character_input, decoder_word_input_2], True) \ or (z is not None and decoder_word_input_2 is not None) if z is None: [batch_size, _] = encoder_word_input.size() encoder_input = self.embedding(encoder_word_input, encoder_character_input) [batch_size_2, _] = encoder_word_input_2.size() encoder_input_2 = self.embedding_2(encoder_word_input_2, encoder_character_input_2) context, h_0, c_0 = self.encoder(encoder_input, None) State = (h_0, c_0) context_2, _, _ = self.encoder_2(encoder_input_2, State) mu = self.context_to_mu(context_2) logvar = self.context_to_logvar(context_2) std = t.exp(0.5 * logvar) z = Variable( t.randn([batch_size, self.params.latent_variable_size])) if use_cuda: z = z.cuda() z = z * std + mu kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1, 1)).mean().squeeze() # encoder_input = self.embedding(encoder_word_input, encoder_character_input) # _ , h_0 , c_0 = self.encoder_3(encoder_input, None) initial_state = State else: kld = None mu = None std = None decoder_input_2 = self.embedding_2.word_embed(decoder_word_input_2) out, final_state = self.decoder(decoder_input_2, z, drop_prob, initial_state) return out, final_state, kld, mu, std def learnable_parameters(self): return [p for p in self.parameters() if p.requires_grad] def trainer(self, optimizer, batch_loader, batch_loader_2): def train(i, batch_size, use_cuda, dropout, start_index): input = batch_loader.next_batch(batch_size, 'train', start_index) input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target ] = input input_2 = batch_loader_2.next_batch(batch_size, 'train', start_index) input_2 = [Variable(t.from_numpy(var)) for var in input_2] input_2 = [var.long() for var in input_2] input_2 = [var.cuda() if use_cuda else var for var in input_2] [ encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, target ] = input_2 logits, _, kld, _, _ = self(dropout, encoder_word_input, encoder_character_input, encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, z=None) # logits = logits.view(-1, self.params.word_vocab_size) logits = logits.view(-1, self.params_2.word_vocab_size) target = target.view(-1) cross_entropy = F.cross_entropy(logits, target) loss = 79 * cross_entropy + kld_coef(i) * kld optimizer.zero_grad() loss.backward() optimizer.step() return cross_entropy, kld, kld_coef(i) return train def validater(self, batch_loader, batch_loader_2): def validate(batch_size, use_cuda, start_index): input = batch_loader.next_batch(batch_size, 'valid', start_index) input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target ] = input input_2 = batch_loader_2.next_batch(batch_size, 'valid', start_index) input_2 = [Variable(t.from_numpy(var)) for var in input_2] input_2 = [var.long() for var in input_2] input_2 = [var.cuda() if use_cuda else var for var in input_2] [ encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, target ] = input_2 logits, _, kld, _, _ = self(0., encoder_word_input, encoder_character_input, encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, z=None) logits = logits.view(-1, self.params_2.word_vocab_size) target = target.view(-1) cross_entropy = F.cross_entropy(logits, target) return cross_entropy, kld return validate def sample(self, batch_loader, seq_len, seed, use_cuda, State): if use_cuda: seed = seed.cuda() decoder_word_input_np, decoder_character_input_np = batch_loader.go_input( 1) decoder_word_input = Variable( t.from_numpy(decoder_word_input_np).long()) decoder_character_input = Variable( t.from_numpy(decoder_character_input_np).long()) if use_cuda: decoder_word_input, decoder_character_input = decoder_word_input.cuda( ), decoder_character_input.cuda() result = '' initial_state = State for i in range(seq_len): logits, initial_state, _, _, _ = self(0., None, None, None, None, decoder_word_input, decoder_character_input, seed, initial_state) # forward(self, drop_prob, # encoder_word_input=None, encoder_character_input=None, # encoder_word_input_2=None, encoder_character_input_2=None, # decoder_word_input_2=None, decoder_character_input_2=None, # z=None, initial_state=None): # logits = logits.view(-1, self.params.word_vocab_size) # logits = logits.view(-1, self.params.word_vocab_size) logits = logits.view(-1, self.params_2.word_vocab_size) # print '---------------------------------------' # print 'Printing logits' # print logits # print '------------------------------------------' prediction = F.softmax(logits) word = batch_loader.sample_word_from_distribution( prediction.data.cpu().numpy()[-1]) if word == batch_loader.end_token: break result += ' ' + word decoder_word_input_np = np.array([[batch_loader.word_to_idx[word]] ]) decoder_character_input_np = np.array( [[batch_loader.encode_characters(word)]]) decoder_word_input = Variable( t.from_numpy(decoder_word_input_np).long()) decoder_character_input = Variable( t.from_numpy(decoder_character_input_np).long()) if use_cuda: decoder_word_input, decoder_character_input = decoder_word_input.cuda( ), decoder_character_input.cuda() return result def sampler(self, batch_loader, batch_loader_2, seq_len, seed, use_cuda, i, beam_size, n_best): input = batch_loader.next_batch(1, 'valid', i) input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target ] = input encoder_input = self.embedding(encoder_word_input, encoder_character_input) _, h0, c0 = self.encoder(encoder_input, None) State = (h0, c0) results, scores = self.sample_beam(batch_loader_2, seq_len, seed, use_cuda, State, beam_size, n_best) return results, scores def sample_beam(self, batch_loader, seq_len, seed, use_cuda, State, beam_size, n_best): # seed = Variable(t.from_numpy(seed).float()) if use_cuda: seed = seed.cuda() decoder_word_input_np, decoder_character_input_np = batch_loader.go_input( 1) decoder_word_input = Variable( t.from_numpy(decoder_word_input_np).long()) decoder_character_input = Variable( t.from_numpy(decoder_character_input_np).long()) if use_cuda: decoder_word_input, decoder_character_input = decoder_word_input.cuda( ), decoder_character_input.cuda() dec_states = State dec_states = [ dec_states[0].repeat(1, beam_size, 1), dec_states[1].repeat(1, beam_size, 1) ] drop_prob = 0.0 beam_size = beam_size batch_size = 1 beam = [ Beam(beam_size, batch_loader, cuda=True) for k in range(batch_size) ] batch_idx = list(range(batch_size)) remaining_sents = batch_size for i in range(seq_len): input = t.stack([ b.get_current_state() for b in beam if not b.done ]).t().contiguous().view(1, -1) trg_emb = self.embedding_2.word_embed( Variable(input).transpose(1, 0)) # print trg_emb.size() # print seed.size() trg_h, dec_states = self.decoder.only_decoder_beam( trg_emb, seed, drop_prob, dec_states) dec_out = trg_h.squeeze(1) # print "dec_out:", dec_out.size() out = F.softmax(self.decoder.fc(dec_out)).unsqueeze(0) word_lk = out.view(beam_size, remaining_sents, -1).transpose(0, 1).contiguous() active = [] for b in range(batch_size): if beam[b].done: continue idx = batch_idx[b] if not beam[b].advance(word_lk.data[idx]): active += [b] for dec_state in dec_states: # iterate over h, c # layers x beam*sent x dim sent_states = dec_state.view(-1, beam_size, remaining_sents, dec_state.size(2))[:, :, idx] sent_states.data.copy_( sent_states.data.index_select( 1, beam[b].get_current_origin())) if not active: break active_idx = t.cuda.LongTensor([batch_idx[k] for k in active]) batch_idx = {beam: idx for idx, beam in enumerate(active)} def update_active(t): view = t.data.view(-1, remaining_sents, self.params.decoder_rnn_size) new_size = list(t.size()) new_size[-2] = new_size[-2] * len(active_idx) \ // remaining_sents return Variable( view.index_select(1, active_idx).view(*new_size)) dec_states = (update_active(dec_states[0]), update_active(dec_states[1])) dec_out = update_active(dec_out) remaining_sents = len(active) allHyp, allScores = [], [] for b in range(batch_size): scores, ks = beam[b].sort_best() allScores += [scores[:n_best]] hyps = zip(*[beam[b].get_hyp(k) for k in ks[:n_best]]) allHyp += [hyps] return allHyp, allScores
class RVAE(nn.Module): def __init__(self, params, params_2): super(RVAE, self).__init__() self.params = params self.params_2 = params_2 self.embedding = Embedding(self.params, '') self.embedding_2 = Embedding(self.params_2, '') self.encoder = Encoder(self.params) self.encoder_2 = Encoder(self.params_2) self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.encoder_3 = Encoder(self.params) self.decoder = Decoder(self.params_2) def forward(self, drop_prob, encoder_word_input=None, encoder_character_input=None, encoder_word_input_2=None, encoder_character_input_2=None, decoder_word_input_2=None, decoder_character_input_2=None, z=None, initial_state=None): assert parameters_allocation_check(self) use_cuda = self.embedding.word_embed.weight.is_cuda assert z is None and fold(lambda acc, parameter: acc and parameter is not None, [encoder_word_input, encoder_character_input, decoder_word_input_2], True) \ or (z is not None and decoder_word_input_2 is not None) if z is None: [batch_size, _] = encoder_word_input.size() encoder_input = self.embedding(encoder_word_input, encoder_character_input) [batch_size_2, _] = encoder_word_input_2.size() encoder_input_2 = self.embedding_2(encoder_word_input_2, encoder_character_input_2) context, h_0, c_0 = self.encoder(encoder_input, None) State = (h_0, c_0) context_2, _, _ = self.encoder_2(encoder_input_2, State) mu = self.context_to_mu(context_2) logvar = self.context_to_logvar(context_2) std = t.exp(0.5 * logvar) z = Variable( t.randn([batch_size, self.params.latent_variable_size])) if use_cuda: z = z.cuda() z = z * std + mu kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1, 1)).mean().squeeze() encoder_input = self.embedding(encoder_word_input, encoder_character_input) _, h_0, c_0 = self.encoder_3(encoder_input, None) initial_state = (h_0, c_0) else: kld = None decoder_input_2 = self.embedding.word_embed(decoder_word_input_2) out, final_state = self.decoder(decoder_input_2, z, drop_prob, initial_state) return out, final_state, kld def learnable_parameters(self): return [p for p in self.parameters() if p.requires_grad] def trainer(self, optimizer, batch_loader, batch_loader_2): def train(i, batch_size, use_cuda, dropout, start_index): input = batch_loader.next_batch(batch_size, 'train', start_index) input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target ] = input input_2 = batch_loader_2.next_batch(batch_size, 'train', start_index) input_2 = [Variable(t.from_numpy(var)) for var in input_2] input_2 = [var.long() for var in input_2] input_2 = [var.cuda() if use_cuda else var for var in input_2] [ encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, target ] = input_2 logits, _, kld = self(dropout, encoder_word_input, encoder_character_input, encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, z=None) logits = logits.view(-1, self.params_2.word_vocab_size) target = target.view(-1) cross_entropy = F.cross_entropy(logits, target) loss = 79 * cross_entropy + kld_coef(i) * kld optimizer.zero_grad() loss.backward() optimizer.step() return cross_entropy, kld, kld_coef(i) return train def validater(self, batch_loader, batch_loader_2): def validate(batch_size, use_cuda, start_index): input = batch_loader.next_batch(batch_size, 'valid', start_index) input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target ] = input input_2 = batch_loader_2.next_batch(batch_size, 'valid', start_index) input_2 = [Variable(t.from_numpy(var)) for var in input_2] input_2 = [var.long() for var in input_2] input_2 = [var.cuda() if use_cuda else var for var in input_2] [ encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, target ] = input_2 logits, _, kld = self(0., encoder_word_input, encoder_character_input, encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, z=None) logits = logits.view(-1, self.params_2.word_vocab_size) target = target.view(-1) cross_entropy = F.cross_entropy(logits, target) return cross_entropy, kld return validate def sample(self, batch_loader, seq_len, seed, use_cuda, State): seed = Variable(t.from_numpy(seed).float()) if use_cuda: seed = seed.cuda() decoder_word_input_np, decoder_character_input_np = batch_loader.go_input( 1) decoder_word_input = Variable( t.from_numpy(decoder_word_input_np).long()) decoder_character_input = Variable( t.from_numpy(decoder_character_input_np).long()) if use_cuda: decoder_word_input, decoder_character_input = decoder_word_input.cuda( ), decoder_character_input.cuda() result = '' initial_state = State for i in range(seq_len): logits, initial_state, _ = self(0., None, None, None, None, decoder_word_input, decoder_character_input, seed, initial_state) logits = logits.view(-1, self.params_2.word_vocab_size) prediction = F.softmax(logits) word = batch_loader.sample_word_from_distribution( prediction.data.cpu().numpy()[-1]) if word == batch_loader.end_token: break result += ' ' + word decoder_word_input_np = np.array([[batch_loader.word_to_idx[word]] ]) decoder_character_input_np = np.array( [[batch_loader.encode_characters(word)]]) decoder_word_input = Variable( t.from_numpy(decoder_word_input_np).long()) decoder_character_input = Variable( t.from_numpy(decoder_character_input_np).long()) if use_cuda: decoder_word_input, decoder_character_input = decoder_word_input.cuda( ), decoder_character_input.cuda() return result def sampler(self, batch_loader, seq_len, seed, use_cuda): input = batch_loader.next_batch(1, 'valid', 1) input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target ] = input encoder_input = self.embedding(encoder_word_input, encoder_character_input) _, h0, c0 = self.encoder_3(encoder_input, None) State = (h0, c0) result = self.sample(batch_loader, seq_len, seed, use_cuda, State) return result
class RVAE(nn.Module): def __init__(self, params: object, params_2: object, path: str) -> None: """ [summary] initializes the RVAE with the correct parameters and data files Args: params (object): [description] parameters for original encoder params_2 (object): [description] parameters for paraphrase encoder path (str): [description] a path to the data files """ super(RVAE, self).__init__() self.params = params self.params_2 = params_2 self.embedding = Embedding(self.params, path) self.embedding_2 = Embedding(self.params_2, path, True) self.encoder_original = Encoder(self.params) if self.params.hrvae: self.encoder_paraphrase = EncoderHR(self.params_2) else: self.encoder_paraphrase = Encoder(self.params_2) self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) if self.params.attn_model and self.params.res_model: self.decoder = DecoderResidualAttention(self.params_2) elif self.params.attn_model: self.decoder = DecoderAttention(self.params_2) elif self.params.res_model: self.decoder = DecoderResidual(self.params_2) else: self.decoder = Decoder(self.params_2) def forward(self, unk_idx: int, drop_prob: float, encoder_word_input: object = None, encoder_character_input: object = None, encoder_word_input_2: object = None, encoder_character_input_2: object = None, decoder_word_input_2: object = None, decoder_character_input_2: object = None, z: object = None, initial_state: tuple = None) -> tuple: """ :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type :param initial_state: initial state of decoder rnn in order to perform sampling :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout :param z: context if sampling is performing :return: unnormalized logits of sentence words distribution probabilities with shape of [batch_size, seq_len, word_vocab_size] final rnn state with shape of [num_layers, batch_size, decoder_rnn_size] """ assert parameters_allocation_check(self), \ 'Invalid CUDA options. Parameters should be allocated in the same memory' use_cuda = self.embedding.word_embed.weight.is_cuda assert z is None and fold(lambda acc, parameter: acc and parameter is not None, [encoder_word_input, encoder_character_input, decoder_word_input_2], True) \ or (z is not None and decoder_word_input_2 is not None), \ "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments" if z is None: ''' Get context from encoder and sample z ~ N(mu, std) ''' [batch_size, _] = encoder_word_input.size() encoder_input = self.embedding(encoder_word_input, encoder_character_input, unk_idx, drop_prob) ''' ===================================================Doing the same for encoder-2=================================================== ''' [batch_size_2, _] = encoder_word_input_2.size() encoder_input_2 = self.embedding_2(encoder_word_input_2, encoder_character_input_2, unk_idx, drop_prob) ''' ================================================================================================================================== ''' enc_out_original, context, h_0, c_0, _ = self.encoder_original( encoder_input, None) state_original = (h_0, c_0) # Final state of Encoder-1 enc_out_paraphrase, context_2, h_0, c_0, context_ = self.encoder_paraphrase( encoder_input_2, state_original) # Encoder_2 for Ques_2 state_paraphrase = (h_0, c_0) # Final state of Encoder-2 if context_ is not None: mu_ = [] logvar_ = [] for entry in context_: mu_.append(self.context_to_mu(entry)) logvar_.append(self.context_to_logvar(entry)) std = t.exp(0.5 * logvar_[-1]) z = Variable( t.randn([batch_size, self.params.latent_variable_size])) if use_cuda: z = z.cuda() z = z * std + mu_[-1] mu = t.stack(mu_) logvar = t.stack(logvar_) kld = -0.5 * t.sum(1 + logvar - mu.pow(2) - logvar.exp()) kld = kld / mu.shape[0] else: mu = self.context_to_mu(context_2) logvar = self.context_to_logvar(context_2) std = t.exp(0.5 * logvar) z = Variable( t.randn([batch_size, self.params.latent_variable_size])) if use_cuda: z = z.cuda() z = z * std + mu kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1, 1)).mean().squeeze() else: kld = None mu = None std = None decoder_input_2 = self.embedding_2.word_embed(decoder_word_input_2) out, final_state = self.decoder(decoder_input_2, z, drop_prob, enc_out_paraphrase, state_original) return out, final_state, kld, mu, std def learnable_parameters(self) -> list: """ creates a gradients for each parameter in the class to be optimized """ return [p for p in self.parameters() if p.requires_grad] def trainer(self, optimizer: object, batch_loader: object, batch_loader_2: object) -> object: def train(coef: float, batch_size: int, use_cuda: bool, dropout: float, start_index: int) -> tuple: """ train the encoder/decoder step by step via train() """ input = batch_loader.next_batch(batch_size, 'train', start_index) input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target, _ ] = input ''' =================================================== Input for Encoder-2 ========================================================''' input_2 = batch_loader_2.next_batch(batch_size, 'train', start_index) input_2 = [Variable(t.from_numpy(var)) for var in input_2] input_2 = [var.long() for var in input_2] input_2 = [var.cuda() if use_cuda else var for var in input_2] [ encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, target, _ ] = input_2 unk_idx = None ''' ================================================================================================================================ ''' logits, _, kld, _, _ = self(unk_idx, dropout, encoder_word_input, encoder_character_input, encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, z=None) logits = logits.view(-1, self.params_2.word_vocab_size) target = target.view(-1) cross_entropy = F.cross_entropy(logits, target) loss = 79 * cross_entropy + coef * kld # 79 as arbitrary loss weight optimizer.zero_grad() loss.backward() optimizer.step() return cross_entropy, kld, coef return train def validater(self, batch_loader, batch_loader_2): def validate(batch_size, use_cuda, start_index): """ validate the encoder/decoder step by step via validate() """ input = batch_loader.next_batch(batch_size, 'valid', start_index) input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target ] = input ''' ==================================================== Input for Encoder-2 ======================================================== ''' input_2 = batch_loader_2.next_batch(batch_size, 'valid', start_index) input_2 = [Variable(t.from_numpy(var)) for var in input_2] input_2 = [var.long() for var in input_2] input_2 = [var.cuda() if use_cuda else var for var in input_2] [ encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, target ] = input_2 ''' ================================================================================================================================== ''' unk_idx = batch_loader_2.word_to_idx[batch_loader_2.unk_token] logits, _, kld, _, _ = self(unk_idx, 0., encoder_word_input, encoder_character_input, encoder_word_input_2, encoder_character_input_2, decoder_word_input_2, decoder_character_input_2, z=None) # logits = logits.view(-1, self.params.word_vocab_size) logits = logits.view(-1, self.params_2.word_vocab_size) target = target.view(-1) cross_entropy = F.cross_entropy(logits, target) return cross_entropy, kld return validate def sample(self, batch_loader: object, seq_len: int, seed: int, use_cuda: bool, State: object) -> tuple: """ unroll the decoder step by step to obtain a sample, based on the input encoded original and a random seed """ if use_cuda: seed = seed.cuda() decoder_word_input_np, decoder_character_input_np = batch_loader.go_input( 1) decoder_word_input = Variable( t.from_numpy(decoder_word_input_np).long()) decoder_character_input = Variable( t.from_numpy(decoder_character_input_np).long()) if use_cuda: decoder_word_input, decoder_character_input = decoder_word_input.cuda( ), decoder_character_input.cuda() result = '' initial_state = State for i in range(seq_len): logits, initial_state, _, _, _ = self(0., None, None, None, None, decoder_word_input, decoder_character_input, seed, initial_state) logits = logits.view(-1, self.params_2.word_vocab_size) prediction = F.softmax(logits) word = batch_loader.sample_word_from_distribution( prediction.data.cpu().numpy()[-1]) if word == batch_loader.end_token: break result += ' ' + word decoder_word_input_np = np.array([[batch_loader.word_to_idx[word]] ]) decoder_character_input_np = np.array( [[batch_loader.encode_characters(word)]]) decoder_word_input = Variable( t.from_numpy(decoder_word_input_np).long()) decoder_character_input = Variable( t.from_numpy(decoder_character_input_np).long()) if use_cuda: decoder_word_input, decoder_character_input = decoder_word_input.cuda( ), decoder_character_input.cuda() return result def sampler(self, batch_loader, batch_loader_2, seq_len, seed, use_cuda, i, beam_size, n_best): """ sample using a encoded sentence and a beam search over the states of the decoder """ input = batch_loader.next_batch(1, 'valid', i) input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, decoder_character_input, target, _ ] = input encoder_input = self.embedding(encoder_word_input, encoder_character_input) encoder_output, _, h0, c0, _ = self.encoder_original( encoder_input, None) State = (h0, c0) results, scores = self.sample_beam(batch_loader_2, seq_len, seed, use_cuda, State, beam_size, n_best, encoder_output) return results, scores def sample_beam(self, batch_loader, seq_len, seed, use_cuda, State, beam_size, n_best, encoder_output): """ sample and beam search for unrolling every step of the decoder based on a encoded original input sentence """ if use_cuda: seed = seed.cuda() decoder_word_input_np, decoder_character_input_np = batch_loader.go_input( 1) decoder_word_input = Variable( t.from_numpy(decoder_word_input_np).long()) decoder_character_input = Variable( t.from_numpy(decoder_character_input_np).long()) if use_cuda: decoder_word_input, decoder_character_input = decoder_word_input.cuda( ), decoder_character_input.cuda() dec_states = State dec_states = [ dec_states[0].repeat(1, beam_size, 1), dec_states[1].repeat(1, beam_size, 1) ] drop_prob = 0.0 beam_size = beam_size batch_size = 1 beam = [ Beam(beam_size, batch_loader, cuda=True) for k in range(batch_size) ] batch_idx = list(range(batch_size)) remaining_sents = batch_size for i in range(seq_len): input = t.stack([ b.get_current_state() for b in beam if not b.done ]).t().contiguous().view(1, -1) trg_emb = self.embedding_2.word_embed( Variable(input).transpose(1, 0)) trg_h, dec_states = self.decoder.only_decoder_beam( trg_emb, seed, drop_prob, encoder_output, dec_states) dec_out = trg_h.squeeze(1) out = F.softmax(self.decoder.fc(dec_out)).unsqueeze(0) word_lk = out.view(beam_size, remaining_sents, -1).transpose(0, 1).contiguous() active = [] for b in range(batch_size): if beam[b].done: continue idx = batch_idx[b] if not beam[b].advance(word_lk.data[idx]): active += [b] for dec_state in dec_states: # iterate over h, c # layers x beam*sent x dim sent_states = dec_state.view(-1, beam_size, remaining_sents, dec_state.size(2))[:, :, idx] sent_states.data.copy_( sent_states.data.index_select( 1, beam[b].get_current_origin())) if not active: break # in this section, the sentences that are still active are # compacted so that the decoder is not run on completed sentences active_idx = t.cuda.LongTensor([batch_idx[k] for k in active]) batch_idx = {beam: idx for idx, beam in enumerate(active)} def update_active(t): # select only the remaining active sentences view = t.data.view(-1, remaining_sents, self.params.decoder_rnn_size) new_size = list(t.size()) new_size[-2] = new_size[-2] * len(active_idx) \ // remaining_sents return Variable( view.index_select(1, active_idx).view(*new_size)) dec_states = (update_active(dec_states[0]), update_active(dec_states[1])) dec_out = update_active(dec_out) remaining_sents = len(active) allHyp, allScores = [], [] for b in range(batch_size): scores, ks = beam[b].sort_best() allScores += [scores[:n_best]] hyps = zip(*[beam[b].get_hyp(k) for k in ks[:n_best]]) allHyp += [hyps] # print '==== Complete =========' return allHyp, allScores
class RVAE_dilated(nn.Module): def __init__(self, params, regularised): super(RVAE_dilated, self).__init__() self.params = params self.embedding = Embedding(self.params, '') self.regularised = regularised if self.regularised: print("Highly regularised Encoder") self.encoder = HREncoder(self.params) self.layer_dim = self.params.encoder_num_layers * 2 * self.params.encoder_rnn_size self.context_to_mu = nn.Linear(self.layer_dim * 2, self.params.latent_variable_size) self.context_to_logvar = nn.Linear( self.layer_dim * 2, self.params.latent_variable_size) elif not self.regularised: print('Classic encoder') self.encoder = Encoder(self.params) self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.context_to_logvar = nn.Linear( self.params.encoder_rnn_size * 2, self.params.latent_variable_size) self.decoder = Decoder(self.params) def forward(self, drop_prob, encoder_word_input=None, encoder_character_input=None, decoder_word_input=None, z=None): """ :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout :param z: context if sampling is performing :return: unnormalized logits of sentence words distribution probabilities with shape of [batch_size, seq_len, word_vocab_size] kld loss estimation """ assert parameters_allocation_check(self), \ 'Invalid CUDA options. Parameters should be allocated in the same memory' use_cuda = self.embedding.word_embed.weight.is_cuda assert z is None and fold(lambda acc, parameter: acc and parameter is not None, [encoder_word_input, encoder_character_input, decoder_word_input], True) \ or (z is not None and decoder_word_input is not None), \ "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments" if z is None: ''' Get context from encoder and sample z ~ N(mu, std) ''' [batch_size, _] = encoder_word_input.size() encoder_input = self.embedding(encoder_word_input, encoder_character_input) context = self.encoder(encoder_input) mu = self.context_to_mu(context) logvar = self.context_to_logvar(context) std = t.exp(0.5 * logvar) z = Variable( t.randn([batch_size, self.params.latent_variable_size])) if use_cuda: z = z.cuda() z = z * std + mu kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1, 1)).mean().squeeze() else: kld = None decoder_input = self.embedding.word_embed(decoder_word_input) out = self.decoder(decoder_input, z, drop_prob) return out, kld def learnable_parameters(self): # word_embedding is constant parameter thus it must be dropped from list of parameters for optimizer return [p for p in self.parameters() if p.requires_grad] def trainer(self, optimizer, batch_loader): perplexity = Perplexity() def train(i, batch_size, use_cuda, dropout): input = batch_loader.next_batch(batch_size, 'train') input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, _, target ] = input logits, kld = self(dropout, encoder_word_input, encoder_character_input, decoder_word_input, z=None) logits = logits.view(-1, self.params.word_vocab_size) target = target.view(-1) cross_entropy = F.cross_entropy(logits, target, ignore_index=0, reduction="sum") # since cross entropy is averaged over seq_len, it is necessary to approximate new kld # loss = 79 * cross_entropy + kld #loss = cross_entropy + kld logits = logits.view(batch_size, -1, self.params.word_vocab_size) target = target.view(batch_size, -1) ppl = perplexity(logits, target).mean() optimizer.zero_grad() cross_entropy.backward() optimizer.step() return ppl, kld, cross_entropy return train def validater(self, batch_loader): perplexity = Perplexity() def validate(batch_size, use_cuda): input = batch_loader.next_batch(batch_size, 'valid') input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, _, target ] = input logits, kld = self(0., encoder_word_input, encoder_character_input, decoder_word_input, z=None) ppl = perplexity(logits, target).mean() logits = logits.view(-1, self.params.word_vocab_size) target = target.view(-1) cross_entropy = F.cross_entropy(logits, target, ignore_index=0, reduction="sum") #loss = cross_entropy + kld return ppl, kld, cross_entropy return validate def tester(self, batch_loader): perplexity = Perplexity() def test(batch_size, use_cuda): input = batch_loader.next_batch(batch_size, 'test') input = [Variable(t.from_numpy(var)) for var in input] input = [var.long() for var in input] input = [var.cuda() if use_cuda else var for var in input] [ encoder_word_input, encoder_character_input, decoder_word_input, _, target ] = input logits, kld = self(0., encoder_word_input, encoder_character_input, decoder_word_input, z=None) ppl = perplexity(logits, target).mean() logits = logits.view(-1, self.params.word_vocab_size) target = target.view(-1) cross_entropy = F.cross_entropy(logits, target, ignore_index=0, reduction="sum") #loss = cross_entropy + kld return ppl, kld, cross_entropy return test def sample(self, batch_loader, seq_len, seed, use_cuda): seed = Variable(t.from_numpy(seed).float()) if use_cuda: seed = seed.cuda() decoder_word_input_np, _ = batch_loader.go_input(1) decoder_word_input = Variable( t.from_numpy(decoder_word_input_np).long()) if use_cuda: decoder_word_input = decoder_word_input.cuda() result = '' for i in range(seq_len): logits, _ = self(0., None, None, decoder_word_input, seed) [_, sl, _] = logits.size() logits = logits.view(-1, self.params.word_vocab_size) prediction = F.softmax(logits) prediction = prediction.view(1, sl, -1) # take the last word from prefiction and append it to result word = batch_loader.sample_word_from_distribution( prediction.data.cpu().numpy()[0, -1]) if word == batch_loader.end_token: break result += ' ' + word word = np.array([[batch_loader.word_to_idx[word]]]) decoder_word_input_np = np.append(decoder_word_input_np, word, 1) decoder_word_input = Variable( t.from_numpy(decoder_word_input_np).long()) if use_cuda: decoder_word_input = decoder_word_input.cuda() return result