def __init__(self, encoder: Seq2SeqEncoder): super(TimeDistributedEncoder, self).__init__() self._input_dim = encoder.get_input_dim() self._output_dim = encoder.get_output_dim() self._is_bidirectional = (hasattr(encoder, "is_bidirectional") and encoder.is_bidirectional()) self._encoder = TimeDistributed(encoder)
def embed_encode_and_aggregate_list_text_field_with_feats_only(texts_list: Dict[str, torch.LongTensor], text_field_embedder, embeddings_dropout, encoder: Seq2SeqEncoder, aggregation_type, token_features=None, init_hidden_states=None): embedded_texts = text_field_embedder(texts_list) embedded_texts = embeddings_dropout(embedded_texts) if token_features is not None: embedded_texts = torch.cat([token_features], dim=-1) bs, ch_cnt, ch_tkn_cnt, d = tuple(embedded_texts.shape) embedded_texts_flattened = embedded_texts.view([bs * ch_cnt, ch_tkn_cnt, -1]) # masks texts_mask_dim_3 = get_text_field_mask(texts_list, num_wrapping_dims=1).float() texts_mask_flatened = texts_mask_dim_3.view([-1, ch_tkn_cnt]) # context encoding multiple_texts_init_states = None if init_hidden_states is not None: if init_hidden_states.shape[0] == bs and init_hidden_states.shape[1] != ch_cnt: if init_hidden_states.shape[1] != encoder.get_output_dim(): raise ValueError("The shape of init_hidden_states is {0} but is expected to be {1} or {2}".format(str(init_hidden_states.shape), str([bs, encoder.get_output_dim()]), str([bs, ch_cnt, encoder.get_output_dim()]))) # in this case we passed only 2D tensor which is the default output from question encoder multiple_texts_init_states = init_hidden_states.unsqueeze(1).expand([bs, ch_cnt, encoder.get_output_dim()]).contiguous() # reshape this to match the flattedned tokens multiple_texts_init_states = multiple_texts_init_states.view([bs * ch_cnt, encoder.get_output_dim()]) else: multiple_texts_init_states = init_hidden_states.view([bs * ch_cnt, encoder.get_output_dim()]) encoded_texts_flattened = encoder(embedded_texts_flattened, texts_mask_flatened, hidden_state=multiple_texts_init_states) aggregated_choice_flattened = seq2vec_seq_aggregate(encoded_texts_flattened, texts_mask_flatened, aggregation_type, encoder.is_bidirectional(), 1) # bs*ch X d aggregated_choice_flattened_reshaped = aggregated_choice_flattened.view([bs, ch_cnt, -1]) return aggregated_choice_flattened_reshaped
def __init__(self, vocab: Vocabulary, source_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, max_decoding_steps: int, seq_metrics: Metric, attention: Attention, beam_size: int = None, source_namespace: str = 'source_tokens', target_namespace: str = "tokens", target_embedding_dim: int = None, scheduled_sampling_ratio: float = 0., use_bleu: bool = False, encoder_input_dropout: int = 0.0, encoder_output_dropout: int = 0.0, dropout=0.0, feed_output_attention_to_decoder: bool = False, keep_decoder_output_dim_same_as_encoder: bool = True, initializer: InitializerApplicator = InitializerApplicator()) -> None: super(RecombinationSeq2SeqWithCopy, self).__init__(vocab) self._source_namespace = source_namespace self._target_namespace = target_namespace self._scheduled_sampling_ratio = scheduled_sampling_ratio # We need the start symbol to provide as the input at the first timestep of decoding, and # end symbol as a way to indicate the end of the decoded sequence. self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace) self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace) self._pad_index = self.vocab.get_token_index(self.vocab._padding_token, self._target_namespace) # pylint: disable=protected-access # Evaluation Metrics if use_bleu: pad_index = self.vocab.get_token_index(self.vocab._padding_token, self._target_namespace) # pylint: disable=protected-access self._bleu = BLEU(exclude_indices={pad_index, self._end_index, self._start_index}) else: self._bleu = None self._seq_metric = seq_metrics # At prediction time, we use a beam search to find the most likely sequence of target tokens. beam_size = beam_size or 1 self._max_decoding_steps = max_decoding_steps self._beam_search = BeamSearch(self._end_index, max_steps=max_decoding_steps, beam_size=beam_size) # Dense embedding of source vocab tokens. self._source_embedder = source_embedder # Encoder # Encodes the sequence of source embeddings into a sequence of hidden states. self._encoder = encoder self._encoder_output_dim = self._encoder.get_output_dim() # Attention mechanism applied to the encoder output for each step. self._attention = attention self._feed_output_attention_to_decoder = feed_output_attention_to_decoder if self._feed_output_attention_to_decoder: # If using attention, a weighted average over encoder outputs will be concatenated # to the previous target embedding to form the input to the decoder at each # time step. self._decoder_input_dim = self._encoder_output_dim + target_embedding_dim else: # Otherwise, the input to the decoder is just the previous target embedding. self._decoder_input_dim = target_embedding_dim # Decoder # Dense embedding of vocab words in the target space. num_classes = self.vocab.get_vocab_size(self._target_namespace) self._num_classes = num_classes target_embedding_dim = target_embedding_dim or source_embedder.get_output_dim() self._target_embedder = Embedding(num_classes, target_embedding_dim) # TODO: relax this assumption # Decoder output dim needs to be the same as the encoder output dim since we initialize the # hidden state of the decoder with the final hidden state of the encoder. self._keep_decoder_output_dim_same_as_encoder = keep_decoder_output_dim_same_as_encoder if not self._keep_decoder_output_dim_same_as_encoder: self._decoder_output_dim = int(self._encoder_output_dim / 2) if encoder.is_bidirectional() \ else self._encoder_output_dim else: self._decoder_output_dim = self._encoder_output_dim self._decoder_cell = LSTMCell(self._decoder_input_dim, self._decoder_output_dim) self._transform_decoder_init_state = torch.nn.Sequential( torch.nn.Linear(self._encoder_output_dim, self._decoder_output_dim), torch.nn.Tanh() ) # Generate Score self._output_projection_layer = Linear(self._decoder_output_dim + self._encoder_output_dim, num_classes) # Dropout Layers self._encoder_input_dropout = torch.nn.Dropout(p=encoder_input_dropout) self._encoder_output_dropout = torch.nn.Dropout(p=encoder_output_dropout) self._output_dropout = torch.nn.Dropout(p=dropout) self._embedded_dropout = torch.nn.Dropout(p=dropout) initializer(self)
def __init__(self, vocab: Vocabulary, source_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, max_decoding_steps: int, seq_metrics: Metric, input_attention: Attention = None, input_attention_function: SimilarityFunction = None, beam_size: int = None, target_namespace: str = "tokens", target_embedding_dim: int = None, scheduled_sampling_ratio: float = 0., use_bleu: bool = True, encoder_input_dropout: int = 0.0, encoder_output_dropout: int = 0.0, dropout=0.0, output_attention: Attention = None, feed_output_attention_to_decoder: bool = False, keep_decoder_output_dim_same_as_encoder: bool = True, initializer: InitializerApplicator = InitializerApplicator()) -> None: super().__init__(vocab, source_embedder, encoder, max_decoding_steps, input_attention, input_attention_function, beam_size, target_namespace, target_embedding_dim, scheduled_sampling_ratio, use_bleu) self._seq_metric = seq_metrics self._pad_index = self.vocab.get_token_index(self.vocab._padding_token, self._target_namespace) # pylint: disable=protected-access self._output_attention = output_attention self._encoder_input_dropout = torch.nn.Dropout(p=encoder_input_dropout) self._encoder_output_dropout = torch.nn.Dropout(p=encoder_output_dropout) self._output_dropout = torch.nn.Dropout(p=dropout) self._embedded_dropout = torch.nn.Dropout(p=dropout) self._feed_output_attention_to_decoder = feed_output_attention_to_decoder self._keep_decoder_output_dim_same_as_encoder = keep_decoder_output_dim_same_as_encoder if not self._keep_decoder_output_dim_same_as_encoder: self._decoder_output_dim = int(self._encoder_output_dim / 2) if encoder.is_bidirectional() \ else self._encoder_output_dim self._transform_decoder_init_state = torch.nn.Sequential( torch.nn.Tanh(), torch.nn.Linear(self._encoder_output_dim, self._decoder_output_dim) ) if self._feed_output_attention_to_decoder: self._decoder_input_dim = target_embedding_dim + self._encoder_output_dim self._decoder_cell = LSTMCell(self._decoder_input_dim, self._decoder_output_dim) else: self._decoder_cell = LSTMCell(self._decoder_input_dim, self._decoder_output_dim) num_classes = self.vocab.get_vocab_size(self._target_namespace) if self._output_attention: # self._fuse_decoder_hidden_attention_layout = torch.nn.Sequential(torch.nn.Tanh(), Linear( # self._decoder_output_dim * 2, self._decoder_output_dim # )) self._output_projection_layer = Linear(self._decoder_output_dim + self._encoder_output_dim, num_classes) else: self._output_projection_layer = Linear(self._decoder_output_dim, num_classes) initializer(self)
def __init__(self, vocab: Vocabulary, text_encoder: Seq2SeqEncoder, word_embedder: TextFieldEmbedder, enable_training_log: bool = False, inp_drop_rate: float = 0.2, out_drop_rate: float = 0.2, loss_weights: List = (0.2, 0.4, 0.4), super_mode: str = 'before', backbone: str = 'unet', unet_down_channel: int = 256, feature_sel: int = 127): super(UnifiedFollowUp, self).__init__(vocab) self.text_encoder = text_encoder self.word_embedder = word_embedder """ Define model arch choices """ self.backbone = backbone # input dropout if inp_drop_rate > 0: self.var_inp_dropout = InputVariationalDropout(p=inp_drop_rate) else: self.var_inp_dropout = lambda x: x # output dropout if out_drop_rate > 0: self.var_out_dropout = InputVariationalDropout(p=out_drop_rate) else: self.var_out_dropout = lambda x: x self.hidden_size = text_encoder.get_output_dim() // 2 if text_encoder.is_bidirectional() \ else text_encoder.get_output_dim() self.output_size = text_encoder.get_output_dim() # ele -> element wise multiply # dot -> dot product # cos -> cosine similarity # emb_dot -> embedding dot product # emb_cos -> embedding cosine similarity # linear -> linear similarity # bilinear -> bilinear similarity feature_sel = feature_sel sel_arr = "{0:07b}".format(int(feature_sel)) nni_choices = ['ele', 'dot', 'cos', 'emb_dot', 'emb_cos', 'linear', 'bilinear'] self.segment_choices = [nni_choices[i] for i in range(7) if sel_arr[i] == '1'] # if expand bi-direction, we will regard forward/backward as two channels self.expand_bidir = False self.similar_function = ModuleDict({ 'ele': ElementWiseMatrixAttention(), 'dot': DotProductMatrixAttention(), 'cos': CosineMatrixAttention(), 'emb_dot': DotProductMatrixAttention(), 'emb_cos': CosineMatrixAttention(), 'bilinear': BilinearMatrixAttention(matrix_1_dim=self.output_size, matrix_2_dim=self.output_size), 'linear': LinearMatrixAttention(tensor_1_dim=self.output_size, tensor_2_dim=self.output_size) }) self.attn_channel = 0 for choice in self.segment_choices: if choice == 'ele': self.attn_channel += self.output_size elif choice in ['dot', 'cos', 'emb_dot', 'emb_cos', 'bilinear', 'linear']: if self.expand_bidir: self.attn_channel += 2 else: self.attn_channel += 1 self.class_mapping: Dict[str, int] = get_class_mapping(super_mode=super_mode) # Here we have two choices now, one is MLP, and another is UNet if self.backbone == 'unet': self.segmentation_net = AttentionUNet(input_channels=self.attn_channel, class_number=len(self.class_mapping.keys()), down_channel=unet_down_channel) else: raise Exception("Currently we do not support for other arches.") class_zero_weight = loss_weights[0] class_one_weight = loss_weights[1] self.register_buffer('weight_tensor', torch.tensor([class_zero_weight, class_one_weight, 1 - class_zero_weight - class_one_weight])) self.loss = nn.CrossEntropyLoss(ignore_index=-1, weight=self.weight_tensor) # initialize metrics measurement self.metrics = {'ROUGE': BatchAverage(), '_ROUGE1': BatchAverage(), '_ROUGE2': BatchAverage(), # TODO: You can speed up the code by disable BLEU since # the corpus-based BLEU metric is much time-consuming. 'BLEU': CorpusBLEUMetric(), 'EM': BatchAverage(), 'F1': FScoreMetric(prefix="1"), 'F2': FScoreMetric(prefix="2"), 'F3': FScoreMetric(prefix="3")} parameter_num = count_parameters(self) print(parameter_num) self.min_width = 8 self.min_height = 8 self.enable_training_log = enable_training_log