def forward(self, input, hidden, users, return_h=False): emb, mask = embedded_dropout( self.encoder, input, dropout=self.dropoute if self.training else 0) #emb = self.idrop(emb) emb = emb.cuda() emb = self.lockdrop(emb, self.dropouti) if self.use_ind: uemb = [] for ik in range(len(users)): tuemb = [] for qk in range(len(users[ik])): uex, _ = embedded_dropout( self.user_embed, input[ik][qk], dropout=self.dropoute if self.training else 0, mask=mask) tuemb.append(uex) uemb.append(torch.stack(tuemb)) uemb = torch.stack(uemb).cuda() if self.induse == 'cat': raw_output = torch.cat([emb, uemb], 2) elif self.induse == 'sum': raw_output = emb + uemb else: uemb = None raw_output = emb new_hidden = [] #raw_output, hidden = self.rnn(emb, hidden) raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): current_input = raw_output raw_output, new_h = rnn(raw_output, hidden[l]) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: #self.hdrop(raw_output) raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) hidden = new_hidden output = self.lockdrop(raw_output, self.dropout) outputs.append(output) result = output.view(output.size(0) * output.size(1), output.size(2)) if return_h: return result, hidden, raw_outputs, outputs return result, hidden
def forward(self, inputWords, inputLang, hidden, return_h=False): wordEmb = embedded_dropout( self.word_encoder, inputWords, dropout=self.dropoute if self.training else 0) wordEmb = self.lockdrop(wordEmb, self.dropouti) langEmb = embedded_dropout( self.lang_encoder, inputLang, dropout=self.dropoute if self.training else 0) langEmb = self.lockdrop(langEmb, self.dropouti) if (self.useLangEncoder == True): raw_output = torch.cat((wordEmb, langEmb), 2) else: raw_output = wordEmb new_hidden = [] raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): current_input = raw_output raw_output, new_h = rnn(raw_output, hidden[l]) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) hidden = new_hidden output = self.lockdrop(raw_output, self.dropout) outputs.append(output) predBasis = output.view( output.size(0) * output.size(1), output.size(2)) langPred = self.langDecoder(predBasis) decoded = self.decoder(predBasis) if not self.langDecoderBias is None: biasTerm = self.langDecoderBias(langPred) decoded += biasTerm result = decoded.view(output.size(0), output.size(1), decoded.size(1)) langResult = langPred.view(output.size(0), output.size(1), langPred.size(1)) if return_h: return result, langResult, hidden, raw_outputs, outputs return result, langResult, hidden
def forward(self, input, hidden, return_h=False): emb = embedded_dropout( self.encoder, input, dropout=self.dropoute if self.training else 0) #dropoute=0.1 #emb = self.idrop(emb) emb = self.lockdrop(emb, self.dropouti) #dropouti=0.65 raw_output = emb new_hidden = [] #raw_output, hidden = self.rnn(emb, hidden) raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): current_input = raw_output raw_output, new_h = rnn(raw_output, hidden[l]) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: #self.hdrop(raw_output) raw_output = self.lockdrop(raw_output, self.dropouth) #dropouth=0.3 outputs.append(raw_output) hidden = new_hidden output = self.lockdrop( raw_output, self.dropout ) #output dropout=0.4 (not mentioned in Lookahead paper) outputs.append(output) result = output.view(output.size(0) * output.size(1), output.size(2)) if return_h: #only on training return result, hidden, raw_outputs, outputs return result, hidden
def forward(self, input, hidden, return_h=False, return_prob=False, detach=False): batch_size = input.size(1) emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) #emb = self.idrop(emb) emb = self.lockdrop(emb, self.dropouti) raw_output = emb new_hidden = [] #raw_output, hidden = self.rnn(emb, hidden) raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): current_input = raw_output raw_output, new_h = rnn(raw_output, hidden[l]) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: #self.hdrop(raw_output) raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) hidden = new_hidden output = self.lockdrop(raw_output, self.dropout) # output = self.surrogate_net(output) outputs.append(output) if detach: output = output.detach() latent = self.latent(output) latent = self.lockdrop(latent, self.dropoutl) logit = self.decoder(latent.view(-1, self.ninp)) prior_logit = self.prior(output).contiguous().view(-1, self.n_experts) prior = nn.functional.softmax(prior_logit) prob = nn.functional.softmax(logit.view(-1, self.ntoken)).view( -1, self.n_experts, self.ntoken) prob = (prob * prior.unsqueeze(2).expand_as(prob)).sum(1) if return_prob: model_output = prob else: log_prob = torch.log(prob.add_(1e-8)) model_output = log_prob model_output = model_output.view(-1, batch_size, self.ntoken) if return_h: return model_output, hidden, raw_outputs, outputs return model_output, hidden
def forward(self, input, hidden, v=None, s=None, return_h=False): emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) #emb = self.idrop(emb) emb = self.lockdrop(emb, self.dropouti) raw_output = emb new_hidden = [] new_velocity = [] new_scale = [] #raw_output, hidden = self.rnn(emb, hidden) raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): current_input = raw_output if self.rnn_type == 'MLSTM' or self.rnn_type == 'NLSTM': raw_output, new_h, new_v = rnn(raw_output, hidden[l], v[l]) elif self.rnn_type == 'ALSTM': raw_output, new_h, new_v, new_s = rnn(raw_output, hidden[l], v[l], s[l]) else: raw_output, new_h = rnn(raw_output, hidden[l]) new_hidden.append(new_h) if self.rnn_type == 'MLSTM' or self.rnn_type == 'NLSTM': new_velocity.append(new_v) if self.rnn_type == 'ALSTM': new_velocity.append(new_v) new_scale.append(new_s) raw_outputs.append(raw_output) if l != self.nlayers - 1: #self.hdrop(raw_output) raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) hidden = new_hidden if self.rnn_type == 'MLSTM' or self.rnn_type == 'NLSTM': v = new_velocity if self.rnn_type == 'ALSTM': v = new_velocity s = new_scale output = self.lockdrop(raw_output, self.dropout) outputs.append(output) result = output.view(output.size(0) * output.size(1), output.size(2)) if return_h: if self.rnn_type == 'MLSTM' or self.rnn_type == 'NLSTM': return result, hidden, v, raw_outputs, outputs elif self.rnn_type == 'ALSTM': return result, hidden, v, s, raw_outputs, outputs else: return result, hidden, raw_outputs, outputs if self.rnn_type == 'MLSTM' or self.rnn_type == 'NLSTM': return result, hidden, v elif self.rnn_type == 'ALSTM': return result, hidden, v, s else: return result, hidden
def forward(self, input, hidden, return_h=False, train=False): emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training and self.use_dropout else 0) emb = self.lockdrop(emb, self.dropouti if self.use_dropout else 0) raw_output = emb new_hidden = [] raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): current_input = raw_output raw_output, new_h = rnn(raw_output, hidden[l]) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: raw_output = self.lockdrop(raw_output, self.dropouth if self.use_dropout else 0) outputs.append(raw_output) hidden = new_hidden output = self.lockdrop(raw_output, self.dropout if self.use_dropout else 0) outputs.append(output) result = output.view(output.size(0)*output.size(1), output.size(2)) weight = self.encoder.weight if self.tie_weights or self.joint_emb is not None else self.decoder.weight bias = self.decoder.bias if self.tie_weights or self.joint_emb is None else self.bias.weight if self.joint_emb is not None: result, weight = self.apply_drill(output, weight) if return_h: return result, weight, bias, hidden, raw_outputs, outputs return result, weight, bias, hidden
def forward(self, input, hidden, return_h=False): emb = embedded_dropout(self.encoder, input) #emb = self.idrop(emb) emb = self.lockdrop(emb, self.dropouti) raw_output = emb new_hidden = [] #raw_output, hidden = self.rnn(emb, hidden) raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): current_input = raw_output #print rnn raw_output, new_h = self.run_lstmcell(rnn, raw_output, hidden[l]) del (rnn) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: #self.hdrop(raw_output) raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) #hidden = new_hidden #hidden = raw_output[-1], new_hidden output = self.lockdrop(raw_output, self.dropout) outputs.append(output) decoded = self.decoder( output.view(output.size(0) * output.size(1), output.size(2))) result = decoded.view(output.size(0), output.size(1), decoded.size(1)) if return_h: return result, raw_outputs, outputs return result
def forward(self, input, hidden, return_h=False): emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) emb = self.lockdrop(emb, self.dropouti) raw_output = emb new_hidden = [] raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): raw_output, new_h = rnn(raw_output, hidden[l]) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) hidden = new_hidden output = self.lockdrop(raw_output, self.dropout) outputs.append(output) result = output if return_h: return result, hidden, raw_outputs, outputs return result, hidden
def forward(self, input, hidden, return_h=False): emb, sigma = embedded_dropout(self.encoder, torch.ones_like(self.encoder.weight), input, dropout=self.dropoute if self.training else 0, is_training=self.training) if self.training: m = torch.distributions.normal.Normal(torch.zeros_like(sigma), torch.ones_like(sigma) * 1) sigma = m.sample() * 0.2 emb += sigma emb = self.lockdrop(emb, self.dropouti) raw_output = emb new_hidden = [] #raw_output, hidden = self.rnn(emb, hidden) raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): current_input = raw_output raw_output, new_h = rnn(raw_output, hidden[l]) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) hidden = new_hidden output = self.lockdrop(raw_output, self.dropout) outputs.append(output) result = output.view(output.size(0)*output.size(1), output.size(2)) if return_h: return result, hidden, raw_outputs, outputs return result, hidden
def forward(self, input, hidden, return_h=False): emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) #emb = self.idrop(emb) emb = self.lockdrop(emb, self.dropouti) raw_output = emb new_hidden = [] #raw_output, hidden = self.rnn(emb, hidden) raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): current_input = raw_output raw_output, new_h = rnn(raw_output, hidden[l]) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: #self.hdrop(raw_output) raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) hidden = new_hidden output = self.lockdrop(raw_output, self.dropout) outputs.append(output) result = output.view(output.size(0)*output.size(1), output.size(2)) if return_h: return result, hidden, raw_outputs, outputs return result, hidden
def forward(self, input, hidden, return_h=False): emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) emb = self.lockdrop(emb, self.dropouti) raw_output = emb new_hidden = [] # this is multilayer because the Salesforce version is, and I'll need it later. # but for now, all experiments will be with 1-layer versions raw_outputs = [] outputs = [] for layer, rnn in enumerate(self.rnns): raw_output, new_h = rnn(raw_output, hidden[layer]) new_hidden.append(new_h) raw_outputs.append(raw_output) if layer != self.nlayers - 1: raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) hidden = new_hidden output = self.lockdrop(raw_output, self.dropout) decoded = self.decoder( output.view(output.size(0) * output.size(1), output.size(2))) result = decoded.view(output.size(0), output.size(1), decoded.size(1)) if return_h: return result, hidden, raw_outputs, outputs return result, hidden
def forward(self, input, hidden, return_h=False): emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) emb = self.lockdrop(emb, self.dropouti) raw_output = emb new_hidden = [] raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): current_input = raw_output rnn.h2h.mask_weights(self.wdrop) rnn.i2h.mask_weights(0) raw_output, new_h = rnn(raw_output, hidden[l]) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) hidden = new_hidden output = self.lockdrop(raw_output, self.dropouto) outputs.append(output) decoded = self.decoder( output.view(output.size(0) * output.size(1), output.size(2))) result = decoded.view(output.size(0), output.size(1), decoded.size(1)) if return_h: return result, hidden, raw_outputs, outputs return result, hidden
def forward(self, input, hidden, c_hidden, return_h=False): emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) emb_drop = self.lockdrop(emb, self.dropouti) rnn_h = emb_drop new_hidden = [] rnn_hs, dropped_rnn_hs = [], [] span_scores = None for l, rnn in enumerate(self.rnns): rnn_h, new_h = rnn(rnn_h, hidden[l]) rnn_hs.append(rnn_h) raw_rnn_h = rnn_h new_hidden.append(new_h) if l != self.nlayers - 1: rnn_h = self.lockdrop(rnn_h, self.dropouth) dropped_rnn_hs.append(rnn_h) if l == self.nlayers - 2: span_scores, context, ch = self._att_(rnn_h, c_hidden) context = self.lockdrop(context, self.dropouth) feats = torch.cat([rnn_h, context], dim=2) gate = self._hidden_gate_(feats).sigmoid() context = self.nonlinearity(self._hidden_layer_(context)) context = self.lockdrop(context, self.dropouth) rnn_h = raw_rnn_h * gate + context * (1. - gate) rnn_h = self.lockdrop(rnn_h, self.dropouth) output = self.lockdrop(rnn_h, self.dropout) dropped_rnn_hs.append(output) assert len(dropped_rnn_hs) == len(rnn_hs) result = output.view(output.size(0) * output.size(1), output.size(-1)) if return_h: return result, span_scores, new_hidden, ch, rnn_hs, dropped_rnn_hs return result, span_scores, new_hidden, ch
def forward(self, input, hidden, return_h=False): emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) emb = self.lockdrop(emb, self.dropouti) # for rnn in self.rnns: # if self.wdrop: # tmp = rnn.module # else: # tmp = rnn # tmp.flatten_parameters() raw_output = emb new_hidden = [] raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): current_input = raw_output raw_output, new_h = rnn(raw_output, hidden[l]) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: #self.hdrop(raw_output) raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) hidden = new_hidden output = self.lockdrop(raw_output, self.dropout) outputs.append(output) result = output.view(output.size(0) * output.size(1), output.size(2)) if return_h: return result, hidden, raw_outputs, outputs return result, hidden
def forward(self, input, hidden, return_h=False): emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) #emb = self.idrop(emb) # emb = self.encoder(input) emb = self.lockdrop(emb, self.dropouti) raw_output = emb new_hidden = [] #raw_output, hidden = self.rnn(emb, hidden) raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): current_input = raw_output raw_output, new_h = rnn(raw_output, hidden[l]) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: #self.hdrop(raw_output) raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) hidden = new_hidden output = self.lockdrop(raw_output, self.dropout) outputs.append(output) result = output.view(output.size(0) * output.size(1), output.size(2)) if return_h: return result, hidden, raw_outputs, outputs return result, hidden
def forward(self, input, hidden, return_h=False): nlayers = self.nlayers mask = compute_mask(input.transpose(1, 0)) emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) # emb = self.idrop(emb) emb = self.lockdrop(emb, self.dropouti) lengths = mask.eq(1).long().sum(1) # bs lengths_sort, idx_sort = torch.sort(lengths, dim=0, descending=True) # bs _, idx_unsort = torch.sort(idx_sort, dim=0) # bs emb_sort = emb.index_select(1, idx_sort) # sl * bs * ninp hid_sort = [(h[0].index_select(1, idx_sort), h[1].index_select(1, idx_sort)) for h in hidden] # raw_output = emb_sort new_hidden = [] raw_outputs = [] raw_outputs_sorted = [] outputs = [] for l, rnn in enumerate(self.rnns): emb_sort = torch.nn.utils.rnn.pack_padded_sequence( emb_sort, lengths_sort) current_input = emb_sort emb_sort, new_h = rnn(emb_sort, hid_sort[l]) emb_sort, _ = torch.nn.utils.rnn.pad_packed_sequence(emb_sort) new_hidden.append(new_h) raw_outputs.append(emb_sort) if l != nlayers - 1: emb_sort = self.lockdrop(emb_sort, self.dropouth) outputs.append(emb_sort) raw_outputs = [ raw_output.index_select(1, idx_unsort) for raw_output in raw_outputs ] new_hidden = [(h_sort[0].index_select(1, idx_unsort), h_sort[1].index_select(1, idx_unsort)) for h_sort in new_hidden] hidden = new_hidden output = self.lockdrop(emb_sort, self.dropout) outputs.append(output) result = output.view(output.size(0) * output.size(1), output.size(2)) if return_h: return result, hidden, raw_outputs, outputs return result, hidden
def forward(self, input, hidden, return_h=False): emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) #emb = self.idrop(emb) emb = self.lockdrop(emb, self.dropouti) raw_output = emb new_hidden = [] #raw_output, hidden = self.rnn(emb, hidden) raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): current_input = raw_output raw_output, new_h = rnn(raw_output, hidden[l]) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: #self.hdrop(raw_output) raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) hidden = new_hidden output = self.lockdrop(raw_output, self.dropout) outputs.append(output) #print (output.view(output.size(0), output.size(1), output.size(2)).shape) output_fw = output.view(output.size(0), output.size(1), output.size(2))[:, :, :400] output_bw = output.view(output.size(0), output.size(1), output.size(2))[:, :, 400:] #print (output_fw.view(output_fw.size(0)*output_fw.size(1), output_fw.size(2)).shape) #print (output_bw.view(output_bw.size(0)*output_bw.size(1), output_bw.size(2)).shape) decoded_fw = self.decoder_fw( output_fw.view( output_fw.size(0) * output_fw.size(1), output_fw.size(2))) decoded_bw = self.decoder_bw( output_bw.view( output_bw.size(0) * output_bw.size(1), output_bw.size(2))) result_fw = decoded_fw.view( output_fw.size(0) * output_fw.size(1), decoded_fw.size(1)) result_bw = decoded_bw.view( output_bw.size(0) * output_bw.size(1), decoded_bw.size(1)) #result = decoded.view(output.size(0)*output.size(1), decoded.size(1)) #result = output.view(output.size(0)*output.size(1), output.size(2)) if return_h: #return result, hidden, raw_outputs, outputs #return result, hidden, decode_fw, decode_bw return result_fw, result_bw, hidden, raw_outputs, outputs return result_fw, result_bw, hidden
def evaluate(self, data, eos_tokens=None, dump_hiddens=False): # get weights and compute WX for all words weights_ih, bias_ih = self.rnn.module.weight_ih_l0, self.rnn.module.bias_ih_l0 # only one layer for the moment weights_hh, bias_hh = self.rnn.module.weight_hh_l0, self.rnn.module.bias_hh_l0 all_words = torch.LongTensor([i for i in range(self.ntoken)]).cuda() all_words = embedded_dropout( self.encoder, all_words, dropout=self.dropoute if self.training else 0) all_words_times_W = torch.nn.functional.linear(all_words, weights_ih, bias_ih) # iterate over data set and compute loss total_loss, hidden = 0, self.init_hidden(1) i = 0 entropy, hiddens, all_hiddens = [], [], [] while i < data.size(0): hidden_times_U = torch.nn.functional.linear( hidden[0].repeat(self.ntoken, 1), weights_hh, bias_hh) output = self.nonlinearity(all_words_times_W + hidden_times_U) if dump_hiddens: hiddens.append(output[data[i]].data.cpu().numpy()) distance = self.dist_fn(hidden[0], output, self.bias) softmaxed = torch.nn.functional.log_softmax(self.temp * distance.view(-1), dim=0) raw_loss = -softmaxed[data[i]].item() total_loss += raw_loss / data.size(0) entropy.append(raw_loss) if not eos_tokens is None and data[i].data.cpu().numpy( )[0] in eos_tokens: hidden = self.init_hidden(1) if dump_hiddens: all_hiddens.append(hiddens) hiddens = [] else: hidden = output[data[i]].view(1, 1, -1) hidden = repackage_hidden(hidden) i = i + 1 all_hiddens = all_hiddens if not eos_tokens is None else hiddens if dump_hiddens: return total_loss, np.array(entropy), all_hiddens else: return total_loss, np.array(entropy)
def forward(self, input, hidden, return_h=False, reset_experience=True): emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) #emb = self.idrop(emb) emb = self.lockdrop(emb, self.dropouti) raw_output = emb new_hidden = [] #raw_output, hidden = self.rnn(emb, hidden) raw_outputs = [] outputs = [] if self.debug: debug_mems = [] for l, rnn in enumerate(self.rnns): current_input = raw_output if 'dnc' in self.rnn_type.lower(): raw_output = raw_output.transpose(0, 1) if self.debug: raw_output, new_h, debug = rnn( raw_output, hidden[l], reset_experience=reset_experience, pass_through_memory=True) debug_mems.append(debug) else: raw_output, new_h = rnn(raw_output, hidden[l], reset_experience=reset_experience) raw_output = raw_output.transpose(0, 1) else: raw_output, new_h = rnn(raw_output, hidden[l]) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: #self.hdrop(raw_output) raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) hidden = new_hidden output = self.lockdrop(raw_output, self.dropout).contiguous() outputs.append(output) decoded = self.decoder( output.view(output.size(0) * output.size(1), output.size(2))) result = decoded.view(output.size(0), output.size(1), decoded.size(1)) if return_h: if self.debug: return result, hidden, raw_outputs, outputs, debug_mems return result, hidden, raw_outputs, outputs if self.debug: return result, hidden, debug_mems return result, hidden
def forward(self, input, hidden, return_h=False): emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) #emb = self.idrop(emb) emb = self.lockdrop(emb, self.dropouti) raw_output = emb new_hidden = [] #raw_output, hidden = self.rnn(emb, hidden) raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): current_input = raw_output # Each rnn is a layer! # each raw_output has shape seq_len x batch_size x nb_hidden # new_h is a tuple of 2 elements, each of size 1 x batch_size x nb_hidden (last h and last c) if (self.rnn_type != 'MYLSTM' and self.rnn_type != 'MYFASTLSTM' and self.rnn_type != 'SIMPLEPLASTICLSTM' and self.rnn_type != 'PLASTICLSTM' and self.rnn_type != 'FASTPLASTICLSTM' and self.rnn_type != 'SPLITLSTM'): raw_output, new_h = rnn(raw_output, hidden[l]) else: single_h = hidden[ l] # actually a tuple, includes the h and the c (and for plastic LTMS, includes Hebb as third element!) singleouts = [] for z in range(raw_output.shape[0]): singleout, single_h = rnn(raw_output[z], single_h) #if z==0: # print("RANDOM NUMBER 1:",float(torch.rand(1))) singleouts.append(singleout) new_h = single_h # the last (h,c[,hebb]) after the sequence is processed raw_output = torch.stack(singleouts) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: #self.hdrop(raw_output) # lockdrop will zero out some output units over the whole sequence (separately chosen for each batch, but fixed across sequence) #pdb.set_trace() raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) #pdb.set_trace() hidden = new_hidden #pdb.set_trace() output = self.lockdrop(raw_output, self.dropout) outputs.append(output) result = output.view(output.size(0) * output.size(1), output.size(2)) if return_h: return result, hidden, raw_outputs, outputs return result, hidden
def forward(self, input, hidden): emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) emb = self.lockdrop(emb, self.dropouti) output, h_n = self.rnns(emb, hidden) output = self.lockdrop(output, self.dropout) output = output.view(output.size(0) * output.size(1), output.size(2)) return output, h_n
def forward(self, words, stag, mask): """ tokens: Variable of LongTensor, shape (bsize, ntoken,) mock_emb: mock embedding for convolution overhead """ bsz, ntoken = words.size() emb_words = embedded_dropout(self.encoder, words, dropout=self.dropoute if self.training else 0) emb_words = self.drop(emb_words) emb_stags = embedded_dropout(self.tag_encoder, stag, dropout=self.dropoute if self.training else 0) emb_stags = self.drop(emb_stags) def run_rnn(input, rnn, lengths): sorted_idx = numpy.argsort(lengths)[::-1].tolist() rnn_input = pack_padded_sequence(input[sorted_idx], lengths[sorted_idx], batch_first=True) rnn_out, _ = rnn(rnn_input) # (bsize, ntoken, hidsize*2) rnn_out, _ = pad_packed_sequence(rnn_out, batch_first=True) rnn_out = rnn_out[numpy.argsort(sorted_idx).tolist()] return rnn_out sent_lengths = (mask.sum(dim=1)).data.cpu().numpy().astype('int') dst_lengths = sent_lengths - 1 emb_plus_tag = torch.cat([emb_words, emb_stags], dim=-1) rnn1_out = run_rnn(emb_plus_tag, self.word_rnn, sent_lengths) terminal = self.terminal(rnn1_out.view(-1, self.hid_size*2)) tag = self.arc(terminal) # (bsize, ndst, tagsize) conv_out = self.conv1(rnn1_out.permute(0, 2, 1)).permute(0, 2, 1) # (bsize, ndst, hidsize) rnn2_out = run_rnn(conv_out, self.arc_rnn, dst_lengths) non_terminal = self.non_terminal(rnn2_out.view(-1, self.hid_size*2)) distance = self.distance(rnn2_out.view(-1, self.hid_size*2)).squeeze(dim=-1) # (bsize, ndst) arc = self.arc(non_terminal) # (bsize, ndst, arcsize) return distance.view(bsz, ntoken - 1), arc.contiguous().view(-1, self.arc_size), tag.view(-1, self.arc_size)
def forward(self, inputs): embedded = embedded_dropout( self.embedding, inputs, dropout=self.embed_drop_ratio if self.training else 0) embedded = self.lockdrop(embedded, self.locked_drope) raw_output = embedded for l, rnn in enumerate(self.rnns): raw_output, _ = rnn(raw_output) if l != self.nlayer-1: raw_output = self.lockdrop(raw_output, self.locked_droph) outputs = self.lockdrop(raw_output, self.locked_dropo) dropped_output = outputs outputs = self.out(outputs) return outputs, raw_output, dropped_output
def forward(self, input, hidden): emb = embedded_dropout(self.input_embedding, input, dropout=self.dropoute if self.training else 0) emb = self.lockdrop(emb, self.dropouti) # emb shape: (S, N, emsize) # hidden shape: (nlayers, N, nhid) output, h_n = self.rnns(emb, hidden) # output shape: (S, N, nhid) # h_n shape: (nlayers, N, nhid) output = self.lockdrop(output, self.dropout) output = output.view(output.size(0) * output.size(1), output.size(2)) return output, h_n
def forward(self, input, hidden, return_h=False, return_prob=False): batch_size = input.size(1) # usedp = False if we are at normal eval emb = embedded_dropout(self.encoder, input, dropout=self.dropoute, usedp=(self.training and self.use_dropout)) # emb = self.idrop(emb) emb = self.lockdrop(emb, dropout=self.dropouti if self.use_dropout else 0) raw_output = emb new_hidden = [] # raw_output, hidden = self.rnn(emb, hidden) raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): current_input = raw_output raw_output, new_h = rnn(raw_output, hidden[l]) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: # self.hdrop(raw_output) raw_output = self.lockdrop(raw_output, dropout=self.dropouth if self.use_dropout else 0) outputs.append(raw_output) hidden = new_hidden output = self.lockdrop(raw_output, dropout=self.dropout if self.use_dropout else 0) outputs.append(output) # this i G latent = self.latent(output) # this is H (tanh(W1 * G) latent = self.lockdrop(latent, dropout=self.dropoutl if self.use_dropout else 0) logit = self.decoder(latent.view(-1, self.ninp)) # this is the logit = W2 * H prior_logit = self.prior(output).contiguous().view(-1, self.n_experts) # W3 * G prior = nn.functional.softmax(prior_logit, -1) # softmax ( W3 * G ) prob = nn.functional.softmax(logit.view(-1, self.ntoken), -1).view(-1, self.n_experts, self.ntoken) # N x M prob = (prob * prior.unsqueeze(2).expand_as(prob)).sum(1) if return_prob: model_output = prob else: log_prob = torch.log(prob.add_(1e-8)) model_output = log_prob model_output = model_output.view(-1, batch_size, self.ntoken) if return_h: return model_output, hidden, raw_outputs, outputs return model_output, hidden
def forward(self, input, hidden, return_h=False): emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) raw_output, hidden = self.rnn(emb, hidden) # self.distance = distances output = self.lockdrop(raw_output, self.dropout) result = output.view(output.size(0) * output.size(1), output.size(2)) decoded = self.decoder(result) # result = output.view(output.size(0)*output.size(1), output.size(2)) if return_h: return result, hidden, raw_output, decoded return result, hidden, decoded
def forward(self, input, hidden, use_dropout=True, return_h=False): #hiddenstack = torch.stack(hidden[1],0)#ADDED #hiddenparam = torch.nn.Parameter(hiddenstack)# ADDED if not use_dropout: if self.rnn_type == 'QRNN': raise NotImplementedError use_dropout = use_dropout and self.use_dropout emb = embedded_dropout( self.encoder, input, dropout=self.dropoute if self.training and use_dropout else 0) if use_dropout: emb = self.lockdrop(emb, self.dropouti) raw_output = emb new_hidden = [] raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): current_input = raw_output # if self.use_dropout isn't true, then the rnn doesn't even have a use_dropout param if self.use_dropout and self.rnn_type == 'LSTM': raw_output, new_h = rnn(raw_output, hidden[l], use_dropout=use_dropout) else: raw_output, new_h = rnn(raw_output, hidden[l]) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: if use_dropout: raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) hidden = new_hidden if use_dropout: output = self.lockdrop(raw_output, self.dropout) else: output = raw_output outputs.append(output) result = output.view(output.size(0) * output.size(1), output.size(2)) if return_h: return result, hidden, raw_outputs, outputs, emb return result, hidden
def forward(self, input, prev_targets, hidden, return_h=False): combined_targets = torch.cat((input.unsqueeze(-1), prev_targets.unsqueeze(-1)), -1) emb = embedded_dropout(self.encoder, combined_targets, dropout=self.dropoute if self.training else 0) emb = emb.view(input.shape[0],input.shape[1], -1) emb = self.lockdrop(emb, self.dropouti) combined = emb raw_output = combined new_hidden = [] raw_outputs = [] outputs = [] for l, rnn in enumerate(self.rnns): raw_output, new_h = rnn(raw_output, hidden[l]) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) hidden = new_hidden output = self.lockdrop(raw_output, self.dropout) outputs.append(output) out_size_orig0 = output.size(0) out_size_orig1 = output.size(1) output_c = torch.tanh(self.combiner(output.view(output.size(0)*output.size(1), output.size(2)))) output_c = output_c.view(output.size(0), output.size(1), -1) output_c_dropped = self.lockdrop(output_c, self.dropoutcomb) decoded = self.decoder(output_c_dropped) result = decoded.view(out_size_orig0, out_size_orig1, decoded.size(2)) if return_h: return result, hidden, raw_outputs, outputs return result, hidden
def forward(self, input, hidden, partial_output, return_h=False): emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0) #emb = self.idrop(emb) emb = self.lockdrop(emb, self.dropouti) raw_output = emb new_hidden = [] #raw_output, hidden = self.rnn(emb, hidden) raw_outputs = [] outputs = [] timescale_invgamma = scipy.stats.invgamma.isf(np.linspace(0, 1, 1151), a=0.56, scale=1)[1:] np.save('timescale_invgamma.txt', timescale_invgamma) for l, rnn in enumerate(self.rnns): current_input = raw_output #print('Partial', partial_output) if partial_output: if l == 2: i = partial_output current_input[:, :, (i - 1) * 50:(i) * 50] = torch.tensor( np.zeros(50), dtype=torch.float) #print(np.mean(timescale_invgamma[(i-1)*50:(i)*50])) #print(torch.sum(raw_output[:,:,(i-1)*50:(i)*50] )) #print(torch.sum(raw_output[:,:,(i)*50:] )) #print(i*50) raw_output, new_h = rnn(raw_output, hidden[l]) new_hidden.append(new_h) raw_outputs.append(raw_output) if l != self.nlayers - 1: #self.hdrop(raw_output) raw_output = self.lockdrop(raw_output, self.dropouth) outputs.append(raw_output) hidden = new_hidden output = self.lockdrop(raw_output, self.dropout) outputs.append(output) result = output.view(output.size(0) * output.size(1), output.size(2)) if return_h: return result, hidden, raw_outputs, outputs return result, hidden
def forward(self, input, hidden, return_h=False): emb = embedded_dropout( self.encoder, input, dropout=self.dropoute if self.training else 0 ) emb = self.lockdrop(emb, self.dropouti) raw_output, hidden, raw_outputs, outputs, distances, nm_hs = self.rnn(emb, hidden) self.distance = distances output = self.lockdrop(raw_output, self.dropout) result = output.view(output.size(0)*output.size(1), output.size(2)) if return_h: return result, hidden, raw_outputs, outputs, nm_hs else: return result, hidden
def forward(self, X): emb = embedded_dropout(self.embedding, X, dropout=self.edrop if self.training else 0) if self.standard_dropout: raw_output = F.dropout(emb, p=self.idrop, training=self.training) else: raw_output = self.lockdrop(emb, self.idrop) new_hidden, new_cell_state = [], [] for l, rnn in enumerate(self.rnns): raw_output, (new_h, new_c) = rnn(raw_output) if self.standard_dropout: raw_output = F.dropout(raw_output, p=self.odrop, training=self.training) else: raw_output = self.lockdrop(raw_output, self.odrop) new_hidden.append(new_h) new_cell_state.append(new_c) hidden = torch.cat(new_hidden, 0) cell_state = torch.cat(new_cell_state, 0) final_output = self.output_layer(raw_output) return final_output[:, -1, 0], hidden, cell_state