def forward(self, inputs): batch_size = inputs.sequence.size(0) seq_size = inputs.sequence.size(1) if self.standardizer is not None: inputs_sequence = self.standardizer(inputs) else: inputs_sequence = inputs.sequence inputs_sequence = sequence_dropout(inputs_sequence, p=self.dropout, training=self.training, batch_first=True) packed_input = nn.utils.rnn.pack_padded_sequence( inputs.sequence, inputs.length.data.tolist(), batch_first=True) packed_context, _ = self.rnn(packed_input) hidden_states, _ = nn.utils.rnn.pad_packed_sequence(packed_context, batch_first=True) mlp_layer1 = sequence_dropout(F.relu(self.linear1(hidden_states)), p=self.dropout, training=self.training, batch_first=True) mlp_layer2 = self.linear2(mlp_layer1).view(batch_size, -1) return mlp_layer2
def forward(self, inputs, mask=None): input_embeddings = inputs.embedding.transpose(1, 0) input_lengths = inputs.length sequence_size = input_embeddings.size(0) batch_size = input_embeddings.size(1) input_embeddings = self.ln_(input_embeddings) input_embeddings = sequence_dropout(input_embeddings, p=.25, training=self.training) context_sequence = self.rnn.encoder_context(input_embeddings, length=input_lengths) mask = context_sequence[:, :, 0].eq(0).transpose(1, 0) context_sequence = sequence_dropout(context_sequence, p=.25, training=self.training) context_flat = context_sequence.view(sequence_size * batch_size, -1) logits_flat = self.predictor_module(context_flat) logits = logits_flat.view(sequence_size, batch_size).transpose(1, 0) logits = logits.masked_fill(mask, 0) return logits
def get_features(self, inputs): input_embeddings = inputs.embedding.transpose(1, 0) sequence_size = input_embeddings.size(0) batch_size = input_embeddings.size(1) input_embeddings = sequence_dropout(input_embeddings, p=.25, training=self.training) context_sequence = self.rnn.encoder_context(input_embeddings, length=inputs.length) context_sequence = sequence_dropout(context_sequence, p=.25, training=self.training) context_flat = context_sequence.view(sequence_size * batch_size, -1) features_flat = self.predictor_module(context_flat) features = features_flat.view(sequence_size, batch_size, 2) #packed_features = nn.utils.rnn.pack_padded_sequence( # features, inputs.length.data.tolist()) return features
def compute_features(self, inputs, inputs_mask=None, targets_mask=None): self.input_mlp.output_size if targets_mask is None: targets_mask = inputs.embedding[:, :, 0:1].eq( self.mask_value).repeat(1, 1, self.input_mlp.output_size) else: targets_mask = targets_mask.unsqueeze(2).repeat( 1, 1, self.input_mlp.output_size) inputs_embedding = sequence_dropout(inputs.embedding, p=self.input_dropout, training=self.training, batch_first=True) pc_saliences = [] for i, pc_mlp in enumerate(self.pc_mlps): pc = F.dropout(inputs.principal_components[:, i], p=self.pc_dropout, training=self.training) pc_saliences.append(pc_mlp(pc)) pc_saliences = torch.cat(pc_saliences, 1) batch_size = inputs.embedding.size(0) seq_size = inputs.embedding.size(1) inputs_embedding_flat = inputs_embedding.view(batch_size * seq_size, -1) input_saliences = self.input_mlp(inputs_embedding_flat).view( batch_size, seq_size, -1).masked_fill(targets_mask, 0) coverage = input_saliences.bmm(pc_saliences.view(batch_size, -1, 1)) coverage = coverage.view(batch_size, -1) return coverage
def compute_features(self, inputs, inputs_mask=None, targets_mask=None): input_embeddings = sequence_dropout(inputs.embedding.transpose(1, 0), p=self.embedding_dropout, training=self.training) sequence_size = input_embeddings.size(0) batch_size = input_embeddings.size(1) context_sequence = self.rnn.encoder_context(input_embeddings, length=inputs.length) context_sequence = sequence_dropout(context_sequence, p=self.rnn_dropout, training=self.training) context_flat = context_sequence.view(sequence_size * batch_size, -1) logits_flat = self.mlp(context_flat) logits = logits_flat.view(sequence_size, batch_size).transpose(1, 0) return logits
def compute_features(self, inputs, inputs_mask=None, targets_mask=None): if inputs_mask is None: inputs_mask = inputs.embedding.eq(self.mask_value) inputs_embedding = inputs.embedding.masked_fill(inputs_mask, 0) inputs_embedding = sequence_dropout(inputs_embedding, p=.5, training=self.training, batch_first=True) return inputs_embedding
def forward(self, inputs, targets): batch_size = inputs.sequence.size(0) inputs_mask = self.make_inputs_mask(inputs) inputs_mask = inputs_mask.view(batch_size, 1, -1).repeat(1, targets.sequence.size(1) + 1, 1) for b in range(batch_size): for step, idx in enumerate(targets.sequence.data[b]): inputs_mask[b, step + 1:, idx] = 1 #print(inputs_mask) enc_seq_size = inputs.sequence.size(1) + 1 dec_seq_size = targets.sequence.size(1) + 1 # make inputs 0 mean, unit variance, since tsne coords, # word counts, and salience are all on very different scales #input_sequence = self.standardize(inputs.sequence) if self.input_mod: input_sequence = self.input_mod(inputs) else: input_sequence = inputs.sequence inputs_packed = nn.utils.rnn.pack_padded_sequence( input_sequence, inputs.length.data.tolist(), batch_first=True) packed_context, encoder_state = self.encoder(inputs_packed) context, _ = nn.utils.rnn.pad_packed_sequence(packed_context, batch_first=True) decoder_stop = self.decoder_stop.repeat(batch_size, 1, 1) context_states = torch.cat([context, decoder_stop], 1) context_states = sequence_dropout(context_states, p=self.context_dropout, training=self.training, batch_first=True) # In training mode copy over target label sequence decoder_input = self.make_decoder_input(input_sequence, targets.sequence) decoder_states = self.decoder(decoder_input, encoder_state)[0].transpose(1, 0) output_logits = self.compute_training_attention_logits( context_states, decoder_states) output_logits.data.masked_fill_(inputs_mask, float("-inf")) return output_logits
def forward(self, inputs, mask=None): input_embeddings = inputs.embedding #.transpose(1, 0) batch_size = input_embeddings.size(0) sequence_size = input_embeddings.size(1) mask = input_embeddings.eq(-1) input_embeddings = sequence_dropout(input_embeddings, p=.25, training=self.training, batch_first=True) input_embeddings = input_embeddings.masked_fill(mask, 0) feature_maps = self.cnn.encoder_state_output(input_embeddings) feature_maps = feature_maps.view(batch_size, 1, -1).repeat(1, sequence_size, 1) mlp_input = torch.cat([input_embeddings, feature_maps], 2) mlp_input_flat = mlp_input.view(batch_size * sequence_size, -1) probs_flat = self.predictor_module(mlp_input_flat) probs = probs_flat.view(batch_size, sequence_size) probs = probs.masked_fill(mask[:, :, 0], 0) return probs
def forward(self, inputs, mask=None): input_embeddings = inputs.embedding #.transpose(1, 0) batch_size = input_embeddings.size(0) orig_mask = input_embeddings.eq(-1) orig_seq_size = input_embeddings.size(1) #mask = input_embeddings.eq(-1) if self.pad_params is not None: zeros = Variable( input_embeddings.data.new(batch_size, self.rear_pad_size, input_embeddings.size(2)).fill_(0)) input_embeddings = torch.cat([ self.pad_params.repeat(batch_size, 1, 1), input_embeddings.masked_fill(orig_mask, 0), zeros ], 1) sequence_size = input_embeddings.size(1) mask = input_embeddings.eq(-1) input_embeddings = self.input_ln_(input_embeddings) input_embeddings = sequence_dropout(input_embeddings, p=.05, training=self.training, batch_first=True) #input_embeddings = input_embeddings.masked_fill(mask, 0) #print(input_embeddings) feature_maps = F.relu( self.filters_( input_embeddings.view(batch_size, 1, sequence_size, -1)).squeeze(3).transpose(2, 1)) feature_maps = self.cnn_ln_(feature_maps) #.masked_fill(orig_mask, 0) #print(feature_maps) #print(mask.size()) feature_maps_flat = feature_maps.view(batch_size * orig_seq_size, -1) probs = self.predictor_module(feature_maps_flat).view( batch_size, orig_seq_size).masked_fill(orig_mask[:, :, 0], 0) #print(probs) return probs exit() feature_maps = self.cnn.encoder_state_output(input_embeddings) feature_maps = feature_maps.view(batch_size, 1, -1).repeat(1, sequence_size, 1) print(feature_maps) exit() mlp_input = torch.cat([input_embeddings, feature_maps], 2) mlp_input_flat = mlp_input.view(batch_size * sequence_size, -1) probs_flat = self.predictor_module(mlp_input_flat) probs = probs_flat.view(batch_size, sequence_size) probs = probs.masked_fill(mask[:, :, 0], 0) return probs
def compute_features2(self, inputs): emb_mask = inputs.embedding.eq(-1) input_embeddings_bf = inputs.embedding.masked_fill(emb_mask, 0) input_sum = input_embeddings_bf.transpose(2, 1).sum(2, keepdim=True) input_mean = Variable( (input_sum / Variable(inputs.length.view(-1, 1, 1).data.float())).data) input_embeddings = inputs.embedding.transpose(1, 0) input_lengths = inputs.length sequence_size = input_embeddings.size(0) batch_size = input_embeddings.size(1) input_embeddings = sequence_dropout(input_embeddings, p=.25, training=self.training) context_sequence = self.rnn.encoder_context(input_embeddings, length=input_lengths) mask = context_sequence[:, :, 0].eq(0).transpose(1, 0) context_sequence = sequence_dropout(context_sequence, p=.25, training=self.training) context_flat = context_sequence.view(sequence_size * batch_size, -1) salience_flat = self.salience_module(context_flat) salience = salience_flat.view(sequence_size, batch_size).transpose(1, 0) salience = salience.masked_fill(mask, 0) context_sequence = self.rnn2.encoder_context(input_embeddings, length=input_lengths) context_sequence = sequence_dropout(context_sequence, p=.25, training=self.training) #print(input_mean.size()) #print(context_sequence.size()) #print(input_mean.repeat(1,2,1).size()) coverage_us = context_sequence.transpose(1, 0).bmm(input_mean).view( batch_size, -1) #exit() #coverage_us = inputs.embedding.bmm(input_mean).view(batch_size, -1) coverage_us = coverage_us / Variable(inputs.length.data.float().view( -1, 1)) coverage_us_flat = coverage_us.view(-1, 1) coverage_flat = self.coverage_module(coverage_us_flat) coverage = coverage_flat.view(batch_size, sequence_size) coverage = coverage.masked_fill(mask, 0) position = self.position_params[:, :sequence_size].repeat( batch_size, 1).masked_fill(mask, 0) wc = (self.wc_weight * inputs.word_count + self.wc_bias).view( batch_size, sequence_size).masked_fill(mask, 0) return salience, coverage, position, wc
def forward(self, inputs): mask = inputs.sequence[:, :, 0:1].ne(-1).float() batch_size = inputs.sequence.size(0) seq_size = inputs.sequence.size(1) if self.standardizer is not None: inputs_sequence = self.standardizer(inputs) else: inputs_sequence = inputs.sequence inputs_sequence = sequence_dropout(inputs_sequence, p=self.dropout, training=self.training, batch_first=True) packed_input = nn.utils.rnn.pack_padded_sequence( inputs.sequence, inputs.length.data.tolist(), batch_first=True) packed_context, _ = self.rnn(packed_input) hidden_states_cat, _ = nn.utils.rnn.pad_packed_sequence( packed_context, batch_first=True) hidden_states_cat = sequence_dropout(hidden_states_cat, p=self.dropout, training=self.training, batch_first=True) hidden_states = sequence_dropout(F.tanh( self.linear1(hidden_states_cat)), p=self.dropout, training=self.training, batch_first=True) doc_rep = ((hidden_states * mask).sum(1) \ / inputs.length.view(-1, 1).float()).view( batch_size, 1, -1).repeat(1, seq_size, 1) abs_pos = self.apos_emb( torch.clamp(inputs.absolute_position, 0, self.absolute_positions)) abs_pos = sequence_dropout(abs_pos, p=self.dropout, training=self.training, batch_first=True) rel_pos = self.rpos_emb( torch.clamp(inputs.relative_position, 0, self.relative_positions)) rel_pos = sequence_dropout(rel_pos, p=self.dropout, training=self.training, batch_first=True) wc = self.wc_emb(torch.clamp(inputs.word_count, 0, self.word_counts)) wc = sequence_dropout(wc, p=self.dropout, training=self.training, batch_first=True) tfidf = self.tfidf_emb(torch.clamp(inputs.mean_tfidf, 0, self.tfidfs)) tfidf = sequence_dropout(tfidf, p=self.dropout, training=self.training, batch_first=True) pred_input = torch.cat( [inputs_sequence, doc_rep, abs_pos, rel_pos, wc, tfidf], 2) layer2 = F.relu(self.linear2(pred_input)) layer2 = sequence_dropout(layer2, p=self.dropout, training=self.training, batch_first=True) layer3 = F.relu(self.linear3(layer2)) layer3 = sequence_dropout(layer3, p=self.dropout, training=self.training, batch_first=True) rouge = F.sigmoid(self.linear4(layer3)).view(batch_size, -1) return rouge
def forward(self, inputs): batch_size = inputs.sequence.size(0) seq_size = inputs.sequence.size(1) abs_pos = self.apos_emb( torch.clamp(inputs.absolute_position, 0, self.absolute_positions)) rel_pos = self.rpos_emb( torch.clamp(inputs.relative_position, 0, self.relative_positions)) apos_logit = self.apos_layer(abs_pos).view(batch_size, -1) rpos_logit = self.rpos_layer(rel_pos).view(batch_size, -1) inputs_sequence = sequence_dropout(inputs.sequence, p=self.dropout, training=self.training, batch_first=True) packed_input = nn.utils.rnn.pack_padded_sequence( inputs.sequence, inputs.length.data.tolist(), batch_first=True) packed_context, _ = self.rnn(packed_input) hidden_states, _ = nn.utils.rnn.pad_packed_sequence(packed_context, batch_first=True) sentence_states = F.relu(self.sent_rep(hidden_states)) sentence_states = sequence_dropout(sentence_states, p=self.dropout, training=self.training, batch_first=True) content_logits = self.sent_content(sentence_states).view( batch_size, -1) avg_sentence = sentence_states.sum(1).div_( inputs.length.view(batch_size, 1).float()) doc_rep = self.doc_rep2(F.tanh(self.doc_rep(avg_sentence))) doc_rep = doc_rep.unsqueeze(2) salience_logits = sentence_states.bmm(doc_rep).view(batch_size, -1) sentence_states = sentence_states.split(1, dim=1) logits = [] summary_rep = Variable(sentence_states[0].data.new( sentence_states[0].size()).fill_(0)) for step in range(seq_size): squashed_summary = F.tanh(summary_rep.transpose(1, 2)) novelty_logits = -self.novelty_layer( sentence_states[step]).bmm(squashed_summary).view(batch_size) logits_step = content_logits[:, step] + salience_logits[:, step] \ + novelty_logits + apos_logit[:, step] + rpos_logit[:, step] \ + self.bias prob = F.sigmoid(logits_step) summary_rep = summary_rep + sentence_states[step] * prob.view( batch_size, 1, 1) logits.append(logits_step.view(batch_size, 1)) logits = torch.cat(logits, 1) return logits