def __init__( self, vocab: Vocabulary, span_encoder: Seq2SeqEncoder, reasoning_encoder: Seq2SeqEncoder, input_dropout: float = 0.3, hidden_dim_maxpool: int = 1024, class_embs: bool = True, reasoning_use_obj: bool = True, reasoning_use_answer: bool = True, reasoning_use_question: bool = True, pool_reasoning: bool = True, pool_answer: bool = True, pool_question: bool = False, initializer: InitializerApplicator = InitializerApplicator(), ): super(MultiHopAttentionQABUA, self).__init__(vocab) ################################################################################################### self.obj_downsample = torch.nn.Sequential( torch.nn.Dropout(p=0.1), torch.nn.Linear(2048, 512), torch.nn.ReLU(inplace=True), ) self.rnn_input_dropout = TimeDistributed( InputVariationalDropout( input_dropout)) if input_dropout > 0 else None self.span_encoder = TimeDistributed(span_encoder) self.reasoning_encoder = TimeDistributed(reasoning_encoder) self.span_attention = BilinearMatrixAttention( matrix_1_dim=span_encoder.get_output_dim(), matrix_2_dim=span_encoder.get_output_dim(), ) self.obj_attention = BilinearMatrixAttention( matrix_1_dim=span_encoder.get_output_dim(), matrix_2_dim=512, ) self.reasoning_use_obj = reasoning_use_obj self.reasoning_use_answer = reasoning_use_answer self.reasoning_use_question = reasoning_use_question self.pool_reasoning = pool_reasoning self.pool_answer = pool_answer self.pool_question = pool_question dim = sum([ d for d, to_pool in [( reasoning_encoder.get_output_dim(), self.pool_reasoning ), (span_encoder.get_output_dim(), self.pool_answer ), (span_encoder.get_output_dim(), self.pool_question)] if to_pool ]) self.final_mlp = torch.nn.Sequential( torch.nn.Dropout(input_dropout, inplace=False), torch.nn.Linear(dim, hidden_dim_maxpool), torch.nn.ReLU(inplace=True), torch.nn.Dropout(input_dropout, inplace=False), torch.nn.Linear(hidden_dim_maxpool, 1), ) self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, matrix_attention: MatrixAttention, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = matrix_attention self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match( modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim", ) check_dimensions_match( text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim", ) check_dimensions_match( span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim", ) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, matrix_attention_layer: MatrixAttention, modeling_layer: Seq2SeqEncoder, dropout_prob: float = 0.1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, answering_abilities: List[str] = None, ) -> None: super().__init__(vocab, regularizer) if answering_abilities is None: self.answering_abilities = [ "passage_span_extraction", "question_span_extraction", "addition_subtraction", "counting", ] else: self.answering_abilities = answering_abilities text_embed_dim = text_field_embedder.get_output_dim() encoding_in_dim = phrase_layer.get_input_dim() encoding_out_dim = phrase_layer.get_output_dim() modeling_in_dim = modeling_layer.get_input_dim() modeling_out_dim = modeling_layer.get_output_dim() self._text_field_embedder = text_field_embedder self._embedding_proj_layer = torch.nn.Linear(text_embed_dim, encoding_in_dim) self._highway_layer = Highway(encoding_in_dim, num_highway_layers) self._encoding_proj_layer = torch.nn.Linear(encoding_in_dim, encoding_in_dim) self._phrase_layer = phrase_layer self._matrix_attention = matrix_attention_layer self._modeling_proj_layer = torch.nn.Linear(encoding_out_dim * 4, modeling_in_dim) self._modeling_layer = modeling_layer self._passage_weights_predictor = torch.nn.Linear(modeling_out_dim, 1) self._question_weights_predictor = torch.nn.Linear(encoding_out_dim, 1) if len(self.answering_abilities) > 1: self._answer_ability_predictor = FeedForward( modeling_out_dim + encoding_out_dim, activations=[Activation.by_name("relu")(), Activation.by_name("linear")()], hidden_dims=[modeling_out_dim, len(self.answering_abilities)], num_layers=2, dropout=dropout_prob, ) if "passage_span_extraction" in self.answering_abilities: self._passage_span_extraction_index = self.answering_abilities.index( "passage_span_extraction" ) self._passage_span_start_predictor = FeedForward( modeling_out_dim * 2, activations=[Activation.by_name("relu")(), Activation.by_name("linear")()], hidden_dims=[modeling_out_dim, 1], num_layers=2, ) self._passage_span_end_predictor = FeedForward( modeling_out_dim * 2, activations=[Activation.by_name("relu")(), Activation.by_name("linear")()], hidden_dims=[modeling_out_dim, 1], num_layers=2, ) if "question_span_extraction" in self.answering_abilities: self._question_span_extraction_index = self.answering_abilities.index( "question_span_extraction" ) self._question_span_start_predictor = FeedForward( modeling_out_dim * 2, activations=[Activation.by_name("relu")(), Activation.by_name("linear")()], hidden_dims=[modeling_out_dim, 1], num_layers=2, ) self._question_span_end_predictor = FeedForward( modeling_out_dim * 2, activations=[Activation.by_name("relu")(), Activation.by_name("linear")()], hidden_dims=[modeling_out_dim, 1], num_layers=2, ) if "addition_subtraction" in self.answering_abilities: self._addition_subtraction_index = self.answering_abilities.index( "addition_subtraction" ) self._number_sign_predictor = FeedForward( modeling_out_dim * 3, activations=[Activation.by_name("relu")(), Activation.by_name("linear")()], hidden_dims=[modeling_out_dim, 3], num_layers=2, ) if "counting" in self.answering_abilities: self._counting_index = self.answering_abilities.index("counting") self._count_number_predictor = FeedForward( modeling_out_dim, activations=[Activation.by_name("relu")(), Activation.by_name("linear")()], hidden_dims=[modeling_out_dim, 10], num_layers=2, ) self._drop_metrics = DropEmAndF1() self._dropout = torch.nn.Dropout(p=dropout_prob) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, embedding_dropout: float, pre_encode_feedforward: FeedForward, encoder: Seq2SeqEncoder, integrator: Seq2SeqEncoder, integrator_dropout: float, output_layer: Union[FeedForward, Maxout], elmo: Elmo, use_input_elmo: bool = False, use_integrator_output_elmo: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(CategoryCrisisELmoClassifier, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder if "elmo" in self._text_field_embedder._token_embedders.keys(): # pylint: disable=protected-access raise ConfigurationError( "To use ELMo in the BiattentiveClassificationNetwork input, " "remove elmo from the text_field_embedder and pass an " "Elmo object to the BiattentiveClassificationNetwork and set the " "'use_input_elmo' and 'use_integrator_output_elmo' flags accordingly." ) self._embedding_dropout = nn.Dropout(embedding_dropout) self._num_classes = self.vocab.get_vocab_size("labels") self._pre_encode_feedforward = pre_encode_feedforward self._encoder = encoder self._integrator = integrator self._integrator_dropout = nn.Dropout(integrator_dropout) self._elmo = elmo self._use_input_elmo = use_input_elmo self._use_integrator_output_elmo = use_integrator_output_elmo self._num_elmo_layers = int(self._use_input_elmo) + int( self._use_integrator_output_elmo) # Check that, if elmo is None, none of the elmo flags are set. if self._elmo is None and self._num_elmo_layers != 0: raise ConfigurationError( "One of 'use_input_elmo' or 'use_integrator_output_elmo' is True, " "but no Elmo object was provided upon construction. Pass in an Elmo " "object to use Elmo.") if self._elmo is not None: # Check that, if elmo is not None, we use it somewhere. if self._num_elmo_layers == 0: raise ConfigurationError( "Elmo object provided upon construction, but both 'use_input_elmo' " "and 'use_integrator_output_elmo' are 'False'. Set one of them to " "'True' to use Elmo, or do not provide an Elmo object upon construction." ) # Check that the number of flags set is equal to the num_output_representations of the Elmo object # pylint: disable=protected-access,too-many-format-args if len(self._elmo._scalar_mixes) != self._num_elmo_layers: raise ConfigurationError( "Elmo object has num_output_representations=%s, but this does not " "match the number of use_*_elmo flags set to true. use_input_elmo " "is %s, and use_integrator_output_elmo is %s".format( str(len(self._elmo._scalar_mixes)), str(self._use_input_elmo), str(self._use_integrator_output_elmo))) # Calculate combined integrator output dim, taking into account elmo if self._use_integrator_output_elmo: self._combined_integrator_output_dim = ( self._integrator.get_output_dim() + self._elmo.get_output_dim()) else: self._combined_integrator_output_dim = self._integrator.get_output_dim( ) self._self_attentive_pooling_projection = nn.Linear( self._combined_integrator_output_dim, 1) self._output_layer = output_layer if self._use_input_elmo: check_dimensions_match( text_field_embedder.get_output_dim() + self._elmo.get_output_dim(), self._pre_encode_feedforward.get_input_dim(), "text field embedder output dim + ELMo output dim", "Pre-encoder feedforward input dim") else: check_dimensions_match( text_field_embedder.get_output_dim(), self._pre_encode_feedforward.get_input_dim(), "text field embedder output dim", "Pre-encoder feedforward input dim") check_dimensions_match(self._pre_encode_feedforward.get_output_dim(), self._encoder.get_input_dim(), "Pre-encoder feedforward output dim", "Encoder input dim") check_dimensions_match(self._encoder.get_output_dim() * 3, self._integrator.get_input_dim(), "Encoder output dim * 3", "Integrator input dim") if self._use_integrator_output_elmo: check_dimensions_match( self._combined_integrator_output_dim * 4, self._output_layer.get_input_dim(), "(Integrator output dim + ELMo output dim) * 4", "Output layer input dim") else: check_dimensions_match(self._integrator.get_output_dim() * 4, self._output_layer.get_input_dim(), "Integrator output dim * 4", "Output layer input dim") check_dimensions_match(self._output_layer.get_output_dim(), self._num_classes, "Output layer output dim", "Number of classes.") self.loss = torch.nn.BCEWithLogitsLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, matrix_attention_layer: MatrixAttention, modeling_layer: Seq2SeqEncoder, dropout_prob: float = 0.1, use_semantic_views=True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) text_embed_dim = text_field_embedder.get_output_dim() encoding_in_dim = phrase_layer.get_input_dim() encoding_out_dim = phrase_layer.get_output_dim() modeling_in_dim = modeling_layer.get_input_dim() modeling_out_dim = modeling_layer.get_output_dim() self.return_output_metadata = False self.use_semantic_views = use_semantic_views self._text_field_embedder = text_field_embedder self._embedding_proj_layer = torch.nn.Linear(text_embed_dim, encoding_in_dim) self._highway_layer = Highway(encoding_in_dim, num_highway_layers) self._encoding_proj_layer = torch.nn.Linear(encoding_in_dim, encoding_in_dim) self._phrase_layer = phrase_layer self._matrix_attention = matrix_attention_layer self._modeling_proj_layer = torch.nn.Linear(encoding_out_dim * 4, modeling_in_dim) self._modeling_layer = modeling_layer self._span_start_predictor = torch.nn.Linear(modeling_out_dim * 2, 1) self._span_end_predictor = torch.nn.Linear(modeling_out_dim * 2, 1) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1Custom() self._dropout = torch.nn.Dropout( p=dropout_prob) if dropout_prob > 0 else lambda x: x # evaluation # BLEU self._bleu_score_types_to_use = ["BLEU1", "BLEU2", "BLEU3", "BLEU4"] self._bleu_scores = { x: Average() for x in self._bleu_score_types_to_use } # ROUGE using pyrouge self._rouge_score_types_to_use = ['rouge-n', 'rouge-l', 'rouge-w'] # if we have rouge-n as metric we actualy get n scores like rouge-1, rouge-2, .., rouge-n max_rouge_n = 4 rouge_n_metrics = [] if "rouge-n" in self._rouge_score_types_to_use: rouge_n_metrics = [ "rouge-{0}".format(x) for x in range(1, max_rouge_n + 1) ] rouge_scores_names = rouge_n_metrics + [ y for y in self._rouge_score_types_to_use if y != 'rouge-n' ] self._rouge_scores = {x: Average() for x in rouge_scores_names} self._rouge_evaluator = rouge.Rouge( metrics=self._rouge_score_types_to_use, max_n=max_rouge_n, limit_length=True, length_limit=100, length_limit_type='words', apply_avg=False, apply_best=False, alpha=0.5, # Default F1_score weight_factor=1.2, stemming=True) initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.text_field_embedder = text_field_embedder self.encoder = encoder self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError( f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or FeedForward( encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("labels") self.head_tag_feedforward = tag_feedforward or FeedForward( encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim, tag_representation_dim, label_dim=num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match( representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) check_dimensions_match( tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim", ) check_dimensions_match( arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim", ) self._unlabelled_f1 = F1Measure(positive_label=1) self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction="none") self._tag_loss = torch.nn.CrossEntropyLoss(reduction="none") initializer(self)
def __init__(self, vocab: Vocabulary, params: Params, regularizer: RegularizerApplicator = None): super(HMTL, self).__init__(vocab=vocab, regularizer=regularizer) # Base text Field Embedder text_field_embedder_params = params.pop("text_field_embedder") text_field_embedder = BasicTextFieldEmbedder.from_params( vocab=vocab, params=text_field_embedder_params) self._text_field_embedder = text_field_embedder ############ # NER Stuffs ############ ner_params = params.pop("ner") # Encoder encoder_ner_params = ner_params.pop("encoder") encoder_ner = Seq2SeqEncoder.from_params(encoder_ner_params) self._encoder_ner = encoder_ner # Tagger NER - CRF Tagger tagger_ner_params = ner_params.pop("tagger") tagger_ner = CrfTagger( vocab=vocab, text_field_embedder=self._text_field_embedder, encoder=self._encoder_ner, label_namespace=tagger_ner_params.pop("label_namespace", "labels"), constraint_type=tagger_ner_params.pop("constraint_type", None), dropout=tagger_ner_params.pop("dropout", None), regularizer=regularizer) self._tagger_ner = tagger_ner ############ # EMD Stuffs ############ emd_params = params.pop("emd") # Encoder encoder_emd_params = emd_params.pop("encoder") encoder_emd = Seq2SeqEncoder.from_params(encoder_emd_params) self._encoder_emd = encoder_emd shortcut_text_field_embedder = ShortcutConnectTextFieldEmbedder( base_text_field_embedder=self._text_field_embedder, previous_encoders=[self._encoder_ner]) self._shortcut_text_field_embedder = shortcut_text_field_embedder # Tagger: EMD - CRF Tagger tagger_emd_params = emd_params.pop("tagger") tagger_emd = CrfTagger( vocab=vocab, text_field_embedder=self._shortcut_text_field_embedder, encoder=self._encoder_emd, label_namespace=tagger_emd_params.pop("label_namespace", "labels"), constraint_type=tagger_emd_params.pop("constraint_type", None), dropout=tagger_ner_params.pop("dropout", None), regularizer=regularizer) self._tagger_emd = tagger_emd ############################ # Relation Extraction Stuffs ############################ relation_params = params.pop("relation") # Encoder encoder_relation_params = relation_params.pop("encoder") encoder_relation = Seq2SeqEncoder.from_params(encoder_relation_params) self._encoder_relation = encoder_relation shortcut_text_field_embedder_relation = ShortcutConnectTextFieldEmbedder( base_text_field_embedder=self._text_field_embedder, previous_encoders=[self._encoder_ner, self._encoder_emd]) self._shortcut_text_field_embedder_relation = shortcut_text_field_embedder_relation # Tagger: Relation tagger_relation_params = relation_params.pop("tagger") tagger_relation = RelationExtractor( vocab=vocab, text_field_embedder=self._shortcut_text_field_embedder_relation, context_layer=self._encoder_relation, d=tagger_relation_params.pop_int("d"), l=tagger_relation_params.pop_int("l"), n_classes=tagger_relation_params.pop("n_classes"), activation=tagger_relation_params.pop("activation")) self._tagger_relation = tagger_relation ############## # Coref Stuffs ############## coref_params = params.pop("coref") # Encoder encoder_coref_params = coref_params.pop("encoder") encoder_coref = Seq2SeqEncoder.from_params(encoder_coref_params) self._encoder_coref = encoder_coref shortcut_text_field_embedder_coref = ShortcutConnectTextFieldEmbedder( base_text_field_embedder=self._text_field_embedder, previous_encoders=[self._encoder_ner, self._encoder_emd]) self._shortcut_text_field_embedder_coref = shortcut_text_field_embedder_coref # Tagger: Coreference tagger_coref_params = coref_params.pop("tagger") eval_on_gold_mentions = tagger_coref_params.pop_bool( "eval_on_gold_mentions", False) init_params = tagger_coref_params.pop("initializer", None) initializer = (InitializerApplicator.from_params(init_params) if init_params is not None else InitializerApplicator()) tagger_coref = CoreferenceCustom( vocab=vocab, text_field_embedder=self._shortcut_text_field_embedder_coref, context_layer=self._encoder_coref, mention_feedforward=FeedForward.from_params( tagger_coref_params.pop("mention_feedforward")), antecedent_feedforward=FeedForward.from_params( tagger_coref_params.pop("antecedent_feedforward")), feature_size=tagger_coref_params.pop_int("feature_size"), max_span_width=tagger_coref_params.pop_int("max_span_width"), spans_per_word=tagger_coref_params.pop_float("spans_per_word"), max_antecedents=tagger_coref_params.pop_int("max_antecedents"), lexical_dropout=tagger_coref_params.pop_float( "lexical_dropout", 0.2), initializer=initializer, regularizer=regularizer, eval_on_gold_mentions=eval_on_gold_mentions) self._tagger_coref = tagger_coref if eval_on_gold_mentions: self._tagger_coref._eval_on_gold_mentions = True logger.info("Multi-Task Learning Model has been instantiated.")
def __init__( self, vocab: Vocabulary, source_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, attention: Attention, beam_size: int, max_decoding_steps: int, target_embedding_dim: int = 30, copy_token: str = "@COPY@", source_namespace: str = "source_tokens", target_namespace: str = "target_tokens", tensor_based_metric: Metric = None, token_based_metric: Metric = None, initializer: InitializerApplicator = InitializerApplicator(), ) -> None: super().__init__(vocab) self._source_namespace = source_namespace self._target_namespace = target_namespace self._src_start_index = self.vocab.get_token_index( START_SYMBOL, self._source_namespace) self._src_end_index = self.vocab.get_token_index( END_SYMBOL, self._source_namespace) self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace) self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace) self._oov_index = self.vocab.get_token_index(self.vocab._oov_token, self._target_namespace) self._pad_index = self.vocab.get_token_index(self.vocab._padding_token, self._target_namespace) self._copy_index = self.vocab.add_token_to_namespace( copy_token, self._target_namespace) self._tensor_based_metric = tensor_based_metric or BLEU( exclude_indices={ self._pad_index, self._end_index, self._start_index }) self._token_based_metric = token_based_metric self._target_vocab_size = self.vocab.get_vocab_size( self._target_namespace) # Encoding modules. self._source_embedder = source_embedder self._encoder = encoder # Decoder output dim needs to be the same as the encoder output dim since we initialize the # hidden state of the decoder with the final hidden state of the encoder. # We arbitrarily set the decoder's input dimension to be the same as the output dimension. self.encoder_output_dim = self._encoder.get_output_dim() self.decoder_output_dim = self.encoder_output_dim self.decoder_input_dim = self.decoder_output_dim target_vocab_size = self.vocab.get_vocab_size(self._target_namespace) # The decoder input will be a function of the embedding of the previous predicted token, # an attended encoder hidden state called the "attentive read", and another # weighted sum of the encoder hidden state called the "selective read". # While the weights for the attentive read are calculated by an `Attention` module, # the weights for the selective read are simply the predicted probabilities # corresponding to each token in the source sentence that matches the target # token from the previous timestep. self._target_embedder = Embedding(num_embeddings=target_vocab_size, embedding_dim=target_embedding_dim) self._attention = attention self._input_projection_layer = Linear( target_embedding_dim + self.encoder_output_dim * 2, self.decoder_input_dim) # We then run the projected decoder input through an LSTM cell to produce # the next hidden state. self._decoder_cell = LSTMCell(self.decoder_input_dim, self.decoder_output_dim) # We create a "generation" score for each token in the target vocab # with a linear projection of the decoder hidden state. self._output_generation_layer = Linear(self.decoder_output_dim, target_vocab_size) # We create a "copying" score for each source token by applying a non-linearity # (tanh) to a linear projection of the encoded hidden state for that token, # and then taking the dot product of the result with the decoder hidden state. self._output_copying_layer = Linear(self.encoder_output_dim, self.decoder_output_dim) # At prediction time, we'll use a beam search to find the best target sequence. self._beam_search = BeamSearch(self._end_index, max_steps=max_decoding_steps, beam_size=beam_size) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", constraint_type: str = None, feedforward: FeedForward = FeedForward( input_dim=66, num_layers=100, hidden_dims=64, activations=torch.nn.ReLU(), dropout=0.5), include_start_end_transitions: bool = True, dropout: float = None, verbose_metrics: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed( Linear(output_dim, self.num_tags)) if constraint_type is not None: labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) else: constraints = None self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=constraint_type or "BIO") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, label_namespace: str = "labels", label_encoding: Optional[str] = None, include_start_end_transitions: bool = True, calculate_span_f1: bool = None, dropout: Optional[float] = None, tcn_level: Optional[int] = None, tcn_input_size: Optional[int] = None, kernel_size: Optional[int] = None, tcn_hidden_size: Optional[int] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self.tcn_level = tcn_level self.tcn_input_size = tcn_input_size self.kernel_size = kernel_size self.tcn_hidden_size = tcn_hidden_size self.num_channels = [self.tcn_hidden_size] * self.tcn_level self.tag_projection_layer = TimeDistributed( Linear(self.tcn_hidden_size, self.num_tags)) if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding self.include_start_end_transitions = include_start_end_transitions self.tcn = tch_layer.TemporalConvNet(self.tcn_input_size, self.num_channels, kernel_size, dropout=dropout) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3), } self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError("calculate_span_f1 is True, but " "no label_encoding was specified.") self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=label_encoding) initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, activation=Activation.by_name("tanh")(), lemma_tag_embedding: Embedding = None, upos_tag_embedding: Embedding = None, xpos_tag_embedding: Embedding = None, feats_tag_embedding: Embedding = None, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.text_field_embedder = text_field_embedder self.encoder = encoder self.activation = activation self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError( f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") encoder_dim = encoder.get_output_dim() # these two matrices together form the feed forward network which takes the vectors of the two words in question and makes predictions from that # this is the trick described by Kiperwasser and Goldberg to make training faster. self.edge_head = Linear(encoder_dim, arc_representation_dim) self.edge_dep = Linear( encoder_dim, arc_representation_dim, bias=False) # bias is already added by edge_head self.tag_head = Linear(encoder_dim, tag_representation_dim) self.tag_dep = Linear(encoder_dim, tag_representation_dim, bias=False) num_labels = self.vocab.get_vocab_size("deps") self.arc_out_layer = Linear( arc_representation_dim, 1, bias=False) # no bias in output layer of K&G model self.tag_out_layer = Linear(arc_representation_dim, num_labels) self._lemma_tag_embedding = lemma_tag_embedding or None self._upos_tag_embedding = upos_tag_embedding or None self._xpos_tag_embedding = xpos_tag_embedding or None self._feats_tag_embedding = feats_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) # add a head sentinel to accommodate for extra root token self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if lemma_tag_embedding is not None: representation_dim += lemma_tag_embedding.get_output_dim() if upos_tag_embedding is not None: representation_dim += upos_tag_embedding.get_output_dim() if xpos_tag_embedding is not None: representation_dim += xpos_tag_embedding.get_output_dim() if feats_tag_embedding is not None: representation_dim += feats_tag_embedding.get_output_dim() check_dimensions_match( representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) self._enhanced_attachment_scores = EnhancedAttachmentScores() self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction="none") self._tag_loss = torch.nn.CrossEntropyLoss(reduction="none") initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, use_attention: bool = False, use_positional_encoding: bool = False, label_namespace: str = "labels", feedforward: Optional[FeedForward] = None, label_encoding: Optional[str] = None, include_start_end_transitions: bool = True, has_mode: bool = False, constrain_crf_decoding: bool = None, calculate_span_f1: bool = None, calculate_relation_f1: bool = False, dropout: Optional[float] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), top_k: int = 1, max_relation_width:int = 11, **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self.top_k = top_k self._verbose_metrics = verbose_metrics self.use_attention = use_attention self.use_positional_encoding = use_positional_encoding self._sample_probability = compounding(0.1, 1.0, 0.99) self.has_mode = has_mode if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) if self.use_attention: self._attention = SelfAttentionGRU( output_dim, embedding_size=encoder.get_output_dim(), rnn_hidden_size=encoder.get_output_dim(), bos_index=self.vocab.get_token_index("O", label_namespace) ) if self.use_positional_encoding: self.positional_encoding = PositionalEncoding(d_model=encoder.get_output_dim(),dropout=dropout) # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError( "constrain_crf_decoding is True, but no label_encoding was specified." ) labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3), } self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError( "calculate_span_f1 is True, but no label_encoding was specified." ) self._f1_metric = SpanBasedF1Measure( vocab, tag_namespace=label_namespace, label_encoding=label_encoding ) self.calculate_relation_f1 = calculate_relation_f1 if calculate_relation_f1: self._relation_f1_metric = RelationMetric( vocab, tag_namespace=label_namespace, label_encoding=label_encoding, has_mode=has_mode, max_relation_width=max_relation_width ) check_dimensions_match( text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) if feedforward is not None: check_dimensions_match( encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim", ) self.j = 0 initializer(self)
hidden_dim=100, num_perspectives=10), aggregator=PytorchSeq2VecWrapper( nn.LSTM(input_size=110, hidden_size=100, bidirectional=True, num_layers=2, batch_first=True, dropout=0.5)), classifier_feedforward=FeedForward( input_dim=400, num_layers=2, hidden_dims=[200, 1], activations=[activ_relu, activ_linear], dropout=[0.5, 0]), initializer=InitializerApplicator(), regularizer=None) """tag处理模块""" tag_ff = TagFF( vocab, word_embeddings, FeedForward(input_dim=300, num_layers=3, hidden_dims=[100, 100, 10], activations=[activ_relu, activ_relu, activ_relu])) """定义模型""" model = OppoLWZ( vocab=vocab, similar_unit=similar_bimpm, tag_feedforward=tag_ff, classifier_feedforward=FeedForward( input_dim=16,
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder_word: Seq2SeqEncoder, attn_word: attention_module.BaseAttention, attn_sent: attention_module.BaseAttention, encoder_sent: Seq2SeqEncoder, thresh: float = 0.5, label_namespace: str = "labels", dropout: float = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, label_indexer: str = "LabelIndicesBiMap") -> None: super(HierAttnNetworkClassifier, self).__init__(vocab, regularizer) # Label Information self.label_namespace = label_namespace self.label_indexer = eval(label_indexer) # FIXME: Implement this self.num_labels = self.label_indexer.get_num_labels() # Prediction thresholds self.thresh = thresh self.log_thresh = np.log(thresh + 1e-5) # Model # Text encoders self.text_field_embedder = text_field_embedder # Sentence and doc encoders self.encoder_word = encoder_word self.encoder_sent = encoder_sent # Attention Modules self.key_dim = attn_sent.get_key_dim() self.attn_word = attn_word self.attn_sent = attn_sent if dropout: self.dropout = Dropout(dropout) else: self.dropout = None # Label prediction self.output_dim = self.attn_sent.get_output_dim() self.logits_layer = Linear(self.output_dim, self.num_labels) self.classification_metric = ClassificationMetrics( self.num_labels, label_indexer) initializer(self) # Some dimension checks check_dimensions_match(text_field_embedder.get_output_dim(), encoder_word.get_input_dim(), "text field embedding dim", "word encoder input dim") check_dimensions_match(encoder_word.get_output_dim(), attn_word.get_input_dim(), "word encoder output", "word attention input") check_dimensions_match(attn_word.get_output_dim(), encoder_sent.get_input_dim(), "word attention output", "sent encoder input") check_dimensions_match(encoder_sent.get_output_dim(), attn_sent.get_input_dim(), "sent encoder output", "sent attn input")
def __init__(self, vocab: Vocabulary, bow_embedder: TokenEmbedder, vae: VAE, kl_weight_annealing: str = "constant", linear_scaling: float = 1000.0, sigmoid_weight_1: float = 0.25, sigmoid_weight_2: float = 15, reference_counts: str = None, reference_vocabulary: str = None, background_data_path: str = None, update_background_freq: bool = False, track_topics: bool = True, track_npmi: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.metrics = {'nkld': Average(), 'nll': Average()} self.vocab = vocab self.vae = vae self.track_topics = track_topics self.track_npmi = track_npmi self.vocab_namespace = "vampire" self._update_background_freq = update_background_freq self._background_freq = self.initialize_bg_from_file( file_=background_data_path) self._ref_counts = reference_counts self._npmi_updated = False if reference_vocabulary is not None: # Compute data necessary to compute NPMI every epoch logger.info("Loading reference vocabulary.") self._ref_vocab = read_json(cached_path(reference_vocabulary)) self._ref_vocab_index = dict( zip(self._ref_vocab, range(len(self._ref_vocab)))) logger.info("Loading reference count matrix.") self._ref_count_mat = load_sparse(cached_path(self._ref_counts)) logger.info("Computing word interaction matrix.") self._ref_doc_counts = (self._ref_count_mat > 0).astype(float) self._ref_interaction = (self._ref_doc_counts).T.dot( self._ref_doc_counts) self._ref_doc_sum = np.array( self._ref_doc_counts.sum(0).tolist()[0]) logger.info("Generating npmi matrices.") (self._npmi_numerator, self._npmi_denominator) = self.generate_npmi_vals( self._ref_interaction, self._ref_doc_sum) self.n_docs = self._ref_count_mat.shape[0] vampire_vocab_size = self.vocab.get_vocab_size(self.vocab_namespace) self._bag_of_words_embedder = bow_embedder self._kl_weight_annealing = kl_weight_annealing self._linear_scaling = float(linear_scaling) self._sigmoid_weight_1 = float(sigmoid_weight_1) self._sigmoid_weight_2 = float(sigmoid_weight_2) if kl_weight_annealing == "linear": self._kld_weight = min(1, 1 / self._linear_scaling) elif kl_weight_annealing == "sigmoid": self._kld_weight = float( 1 / (1 + np.exp(-self._sigmoid_weight_1 * (1 - self._sigmoid_weight_2)))) elif kl_weight_annealing == "constant": self._kld_weight = 1.0 else: raise ConfigurationError( "anneal type {} not found".format(kl_weight_annealing)) # setup batchnorm self.bow_bn = torch.nn.BatchNorm1d(vampire_vocab_size, eps=0.001, momentum=0.001, affine=True) self.bow_bn.weight.data.copy_( torch.ones(vampire_vocab_size, dtype=torch.float64)) self.bow_bn.weight.requires_grad = False # Maintain these states for periodically printing topics and updating KLD self._metric_epoch_tracker = 0 self._kl_epoch_tracker = 0 self._cur_epoch = 0 self._cur_npmi = 0.0 self.batch_num = 0 initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, lemma_tag_embedding: Embedding = None, upos_tag_embedding: Embedding = None, xpos_tag_embedding: Embedding = None, feats_tag_embedding: Embedding = None, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.text_field_embedder = text_field_embedder self.encoder = encoder self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError(f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or FeedForward( encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")() ) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention( arc_representation_dim, arc_representation_dim, use_input_biases=True ) num_labels = self.vocab.get_vocab_size("deps") self.head_tag_feedforward = tag_feedforward or FeedForward( encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")() ) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention( tag_representation_dim, tag_representation_dim, label_dim=num_labels ) self._lemma_tag_embedding = lemma_tag_embedding or None self._upos_tag_embedding = upos_tag_embedding or None self._xpos_tag_embedding = xpos_tag_embedding or None self._feats_tag_embedding = feats_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) # add a head sentinel to accommodate for extra root token in EUD graphs self._head_sentinel = torch.nn.Parameter(torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if lemma_tag_embedding is not None: representation_dim += lemma_tag_embedding.get_output_dim() if upos_tag_embedding is not None: representation_dim += upos_tag_embedding.get_output_dim() if xpos_tag_embedding is not None: representation_dim += xpos_tag_embedding.get_output_dim() if feats_tag_embedding is not None: representation_dim += feats_tag_embedding.get_output_dim() check_dimensions_match( representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) check_dimensions_match( tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim", ) check_dimensions_match( arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim", ) self._enhanced_attachment_scores = EnhancedAttachmentScores() self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction="none") self._tag_loss = torch.nn.CrossEntropyLoss(reduction="none") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", feedforward: Optional[FeedForward] = None, label_encoding: Optional[str] = None, include_start_end_transitions: bool = True, constrain_crf_decoding: bool = None, calculate_span_f1: bool = None, dropout: Optional[float] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, num_virtual_models: int = 0) -> None: super().__init__(vocab, regularizer) self.num_virtual_models = num_virtual_models self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed( Linear(output_dim, self.num_tags)) # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError("constrain_crf_decoding is True, but " "no label_encoding was specified.") labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3), } self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError("calculate_span_f1 is True, but " "no label_encoding was specified.") self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=label_encoding) check_dimensions_match( text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) if feedforward is not None: check_dimensions_match( encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim", ) self.index_dict = { "[pseudo1]": 0, "[pseudo2]": 1, "[pseudo3]": 2, "[pseudo4]": 3, "[pseudo5]": 4, "[pseudo6]": 5, "[pseudo7]": 6, "[pseudo8]": 7, "[pseudo9]": 8 } self.orthogonal_embedding_emb = torch.nn.init.orthogonal_( torch.empty(self.num_virtual_models, text_field_embedder.get_output_dim(), requires_grad=False)).float() self.orthogonal_embedding_hidden = torch.nn.init.orthogonal_( torch.empty(self.num_virtual_models, encoder.get_output_dim(), requires_grad=False)).float() self.vocab = vocab initializer(self)
def __init__(self, vocab: Vocabulary, token_representation_dim: int, encoder: Optional[Seq2SeqEncoder] = None, decoder: Optional[Union[FeedForward, str]] = None, contextualizer: Optional[Contextualizer] = None, pretrained_file: Optional[str] = None, transfer_contextualizer_from_pretrained_file: bool = False, transfer_encoder_from_pretrained_file: bool = False, freeze_encoder: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SelectiveRegressor, self).__init__(vocab, regularizer) self._token_representation_dim = token_representation_dim self._contextualizer = contextualizer if encoder is None: encoder = PassThroughEncoder( input_dim=self._token_representation_dim) self._encoder = encoder # Load the contextualizer and encoder weights from the # pretrained_file if applicable if pretrained_file: archive = None if self._contextualizer and transfer_contextualizer_from_pretrained_file: logger.info("Attempting to load contextualizer weights from " "pretrained_file at {}".format(pretrained_file)) archive = load_archive(cached_path(pretrained_file)) contextualizer_state = archive.model._contextualizer.state_dict( ) contextualizer_layer_num = self._contextualizer._layer_num self._contextualizer.load_state_dict(contextualizer_state) if contextualizer_layer_num is not None: logger.info("Setting layer num to {}".format( contextualizer_layer_num)) self._contextualizer.set_layer_num( contextualizer_layer_num) else: self._contextualizer.reset_layer_num() logger.info("Successfully loaded contextualizer weights!") if transfer_encoder_from_pretrained_file: logger.info("Attempting to load encoder weights from " "pretrained_file at {}".format(pretrained_file)) if archive is None: archive = load_archive(cached_path(pretrained_file)) encoder_state = archive.model._encoder.state_dict() self._encoder.load_state_dict(encoder_state) logger.info("Successfully loaded encoder weights!") self._freeze_encoder = freeze_encoder for parameter in self._encoder.parameters(): # If freeze is true, requires_grad should be false and vice versa. parameter.requires_grad_(not self._freeze_encoder) if decoder is None or decoder == "linear": # Create the default decoder (logistic regression) if it is not provided. decoder = FeedForward.from_params( Params({ "input_dim": self._encoder.get_output_dim(), "num_layers": 1, "hidden_dims": 1, "activations": "linear" })) logger.info("No decoder provided to model, using default " "decoder: {}".format(decoder)) elif decoder == "mlp": # Create the MLP decoder decoder = FeedForward.from_params( Params({ "input_dim": self._encoder.get_output_dim(), "num_layers": 2, "hidden_dims": [1024, 1], "activations": ["relu", "linear"] })) logger.info("Using MLP decoder: {}".format(decoder)) self._decoder = decoder check_dimensions_match(self._token_representation_dim, self._encoder.get_input_dim(), "token representation dim", "encoder input dim") check_dimensions_match(self._encoder.get_output_dim(), self._decoder.get_input_dim(), "encoder output dim", "decoder input dim") check_dimensions_match(self._decoder.get_output_dim(), 1, "decoder output dim", "1, since we're predicting a real value") # SmoothL1Loss as described in "Neural Models of Factuality" (NAACL 2018) self.loss = torch.nn.SmoothL1Loss(reduction="none") self.metrics = { "mae": MeanAbsoluteError(), "pearson_r": PearsonCorrelation() } # Whether to run in error analysis mode or not, see commands.error_analysis self.error_analysis = False logger.info("Applying initializer...") initializer(self)
def __init__( self, vocab: Vocabulary, embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, modules, # TODO(dwadden) Add type. feature_size: int, max_span_width: int, max_trigger_span_width: int, target_task: str, feedforward_params: Dict[str, Union[int, float]], loss_weights: Dict[str, float], lexical_dropout: float = 0.2, use_attentive_span_extractor: bool = False, initializer: InitializerApplicator = InitializerApplicator(), module_initializer: InitializerApplicator = InitializerApplicator( ), regularizer: Optional[RegularizerApplicator] = None, display_metrics: List[str] = None) -> None: super(DyGIE, self).__init__(vocab, regularizer) #################### # Create span extractor. self._endpoint_span_extractor = EndpointSpanExtractor( context_layer.get_output_dim(), combination="x,y", num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False) self._endpoint_trigger_span_extractor = EndpointSpanExtractor( context_layer.get_output_dim(), combination="x,y", num_width_embeddings=max_trigger_span_width, span_width_embedding_dim=feature_size, bucket_widths=False) #################### if lexical_dropout > 0: self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) else: self._lexical_dropout = lambda x: x if use_attentive_span_extractor: self._attentive_span_extractor = SelfAttentiveSpanExtractor( input_dim=context_layer.get_output_dim()) else: self._attentive_span_extractor = None # Set parameters. self._embedder = embedder self._context_layer = context_layer self._loss_weights = loss_weights self._max_span_width = max_span_width self._max_trigger_span_width = max_trigger_span_width self._display_metrics = self._get_display_metrics(target_task) trigger_emb_dim = self._endpoint_trigger_span_extractor.get_output_dim( ) span_emb_dim = self._endpoint_span_extractor.get_output_dim() if self._attentive_span_extractor is not None: span_emb_dim += self._attentive_span_extractor.get_output_dim() trigger_emb_dim += self._attentive_span_extractor.get_output_dim() #################### # Create submodules. modules = Params(modules) # Helper function to create feedforward networks. def make_feedforward(input_dim): return FeedForward(input_dim=input_dim, num_layers=feedforward_params["num_layers"], hidden_dims=feedforward_params["hidden_dims"], activations=torch.nn.ReLU(), dropout=feedforward_params["dropout"]) # Submodules self._ner = NERTagger.from_params(vocab=vocab, make_feedforward=make_feedforward, span_emb_dim=span_emb_dim, feature_size=feature_size, params=modules.pop("ner")) self._coref = CorefResolver.from_params( vocab=vocab, make_feedforward=make_feedforward, span_emb_dim=span_emb_dim, feature_size=feature_size, params=modules.pop("coref")) self._relation = RelationExtractor.from_params( vocab=vocab, make_feedforward=make_feedforward, span_emb_dim=span_emb_dim, feature_size=feature_size, params=modules.pop("relation")) self._events = EventExtractor.from_params( vocab=vocab, make_feedforward=make_feedforward, text_emb_dim=self._embedder.get_output_dim(), trigger_emb_dim=trigger_emb_dim, span_emb_dim=span_emb_dim, feature_size=feature_size, params=modules.pop("events")) #################### # Initialize text embedder and all submodules for module in [self._ner, self._coref, self._relation, self._events]: module_initializer(module) initializer(self)
def __init__(self, vocab: Vocabulary, sh_hierarchy_dir: str, text_field_embedder: TextFieldEmbedder, abstract_text_encoder: Seq2SeqEncoder, attention_encoder: AttentionEncoder, local_globel_tradeoff: float = 0.5, bce_pos_weight: int = 10, use_positional_encoding: bool = False, child_parent_index_pair_dir: str = None, hv_penalty_lambda: float = 0.1, hidden_states_dropout: float = 0.1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(EtdHMCNHierarchicalAttention, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder # self.num_classes = self.vocab.get_vocab_size("labels") self.abstract_text_encoder = abstract_text_encoder # self.attention_encoder = attention_encoder self.local_globel_tradeoff = local_globel_tradeoff self.use_positional_encoding = use_positional_encoding with open(sh_hierarchy_dir, 'r') as f: sh_hierarchy = json.load(f) # Use same dimension of encoders as HMCN dimension self.num_hierarchy_level = len(sh_hierarchy) self.attention_encoders = [attention_encoder] for i in range(self.num_hierarchy_level - 1): self.attention_encoders.append(deepcopy(attention_encoder)) self.attention_encoders = torch.nn.ModuleList(self.attention_encoders) self.HMCN_recurrent = HMCNRecurrent( [len(l) for _, l in sh_hierarchy.items()], attention_encoder.get_output_dim(), attention_encoder.get_output_dim(), hidden_states_dropout=hidden_states_dropout) if text_field_embedder.get_output_dim( ) != abstract_text_encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the abstract_text_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), abstract_text_encoder.get_input_dim())) self.metrics = { # "roc_auc_score": RocAucScore() "hit_5": HitAtK(5), "hit_10": HitAtK(10) # "precision_5": PrecisionAtK(5), # "precision_10": PrecisionAtK(10) # "hit_100": HitAtK(100), # "macro_measure": MacroF1Measure(top_k=5,num_label=self.num_classes) } if child_parent_index_pair_dir: child_parent_pairs = [] with open(child_parent_index_pair_dir, 'r') as f: for l in f.readlines(): pair = l.strip().split(',') child_parent_pairs.append((int(pair[0]), int(pair[1]))) childs_idx, parents_idx = map(list, zip(*child_parent_pairs)) self.loss = HMCNLoss( num_classes=[len(l) for _, l in sh_hierarchy.items()], bce_pos_weight=bce_pos_weight, childs_idx=childs_idx, parents_idx=parents_idx, penalty_lambda=hv_penalty_lambda) else: self.loss = HMCNLoss( num_classes=[len(l) for _, l in sh_hierarchy.items()], bce_pos_weight=bce_pos_weight) # self.loss = torch.nn.BCEWithLogitsLoss(pos_weight = torch.ones(self.num_classes)*bce_pos_weight) initializer(self)
def __init__( self, vocab: Vocabulary, serialization_dir: str, pretrained_model: str, tokenizer_wrapper: HFTokenizerWrapper, num_labels: int, label_namespace: str = "labels", transformer_weights_path: str = None, initializer: InitializerApplicator = InitializerApplicator(), **kwargs, ) -> None: super().__init__(vocab, **kwargs) self._tokenizer_wrapper = tokenizer_wrapper self._label_namespace = label_namespace pre_serialization_dir = os.environ.get("pre_serialization_dir", None) if pre_serialization_dir is not None: tokenizer_wrapper.tokenizer = tokenizer_wrapper.load( pre_serialization_dir) if num_labels: self._num_labels = num_labels else: self._num_labels = vocab.get_vocab_size( namespace=self._label_namespace) self._accuracy = CategoricalAccuracy() self._classifier = AutoModelForSequenceClassification.from_pretrained( pretrained_model, num_labels=self._num_labels, return_dict=True) self._classifier.resize_token_embeddings( len(tokenizer_wrapper.tokenizer)) if transformer_weights_path is not None: with TemporaryDirectory() as tmpdirname: with tarfile.open(transformer_weights_path, mode="r:gz") as input_tar: logger.info("Extracting model...") input_tar.extractall(tmpdirname) model_state = torch.load( os.path.join(tmpdirname, "weights.th"), map_location=util.device_mapping(-1), ) source_prefix = "_transformers_model." target_prefix = "_classifier." + self._classifier.base_model_prefix + "." for target_name, parameter in self.named_parameters(): if not target_name.startswith(target_prefix): continue source_name = source_prefix + target_name[len(target_prefix ):] source_weights = model_state[source_name] parameter.data.copy_(source_weights.data) initializer(self) self._tokenizer_wrapper.tokenizer = self._tokenizer_wrapper.load( serialization_dir, pending=True) self._tokenizer_wrapper.save(serialization_dir) self._classifier.resize_token_embeddings( len(tokenizer_wrapper.tokenizer))
def __init__( self, vocab: Vocabulary, span_encoder: Seq2SeqEncoder, reasoning_encoder: Seq2SeqEncoder, input_dropout: float = 0.3, hidden_dim_maxpool: int = 1024, class_embs: bool = True, reasoning_use_obj: bool = True, reasoning_use_answer: bool = True, reasoning_use_question: bool = True, pool_reasoning: bool = True, pool_answer: bool = True, pool_question: bool = False, initializer: InitializerApplicator = InitializerApplicator(), ): super(AttentionQA, self).__init__(vocab) self.detector = SimpleDetector(pretrained=True, average_pool=True, semantic=class_embs, final_dim=512) ################################################################################################### print('0') self.rnn_input_dropout = TimeDistributed( InputVariationalDropout( input_dropout)) if input_dropout > 0 else None self.span_encoder = TimeDistributed(span_encoder) self.reasoning_encoder = TimeDistributed(reasoning_encoder) # add scene classification visual feature and word embedding feature self.span_attention = BilinearMatrixAttention( matrix_1_dim=span_encoder.get_output_dim(), matrix_2_dim=span_encoder.get_output_dim(), ) self.obj_attention = BilinearMatrixAttention( matrix_1_dim=span_encoder.get_output_dim(), matrix_2_dim=self.detector.final_dim, ) self.reasoning_use_obj = reasoning_use_obj self.reasoning_use_answer = reasoning_use_answer self.reasoning_use_question = reasoning_use_question self.pool_reasoning = pool_reasoning self.pool_answer = pool_answer self.pool_question = pool_question dim = sum([ d for d, to_pool in [( reasoning_encoder.get_output_dim(), self.pool_reasoning ), (span_encoder.get_output_dim(), self.pool_answer ), (span_encoder.get_output_dim(), self.pool_question)] if to_pool ]) self.final_mlp = torch.nn.Sequential( torch.nn.Dropout(input_dropout, inplace=False), torch.nn.Linear(dim, hidden_dim_maxpool), torch.nn.ReLU(inplace=True), torch.nn.Dropout(input_dropout, inplace=False), torch.nn.Linear(hidden_dim_maxpool, 1), ) self._accuracy = CategoricalAccuracy() # I want to replace the CrossEntropyLoss with LSR # self._loss = LabelSmoothingLoss(size=4,smoothing= 0.1) self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiaffineChineseDependencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() # check_dimensions_match(representation_dim, encoder.get_input_dim(), # "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = { tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE } self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info( f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() self._endpoint_span_extractor = EndpointSpanExtractor( self.text_field_embedder.get_output_dim(), combination="x,y", bucket_widths=False) self._attentive_span_extractor = SelfAttentiveSpanExtractor( input_dim=self.text_field_embedder.get_output_dim()) initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", class_labels: List[str] = None, feedforward: Optional[FeedForward] = None, label_encoding: Optional[str] = None, include_start_end_transitions: bool = True, constrain_crf_decoding: bool = None, calculate_span_f1: bool = None, dropout: Optional[float] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, cached_embeddings: Optional[bool] = None, ) -> None: super().__init__(vocab, regularizer) self.cached_embeddings = cached_embeddings self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed( Linear(output_dim, self.num_tags)) # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError("constrain_crf_decoding is True, but " "no label_encoding was specified.") labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions self.crf = WeightedCRF( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions) self.metrics = { "accuracy": CategoricalAccuracy(), } self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError("calculate_span_f1 is True, but " "no label_encoding was specified.") self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=label_encoding) self._tag_f1_metric = TagF1(vocab, class_labels=class_labels) self._average_f1_metric = AverageTagF1(vocab, class_labels=class_labels) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", feedforward: Optional[FeedForward] = None, label_encoding: Optional[str] = None, include_start_end_transitions: bool = True, constrain_crf_decoding: bool = None, dropout: Optional[float] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), top_k: int = 1, **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self.top_k = top_k self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed( Linear(output_dim, self.num_tags)) # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError( "constrain_crf_decoding is True, but no label_encoding was specified." ) labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3), } check_dimensions_match( text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) if feedforward is not None: check_dimensions_match( encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim", ) initializer(self)
def __init__(self, vocab: Vocabulary, bert_pretrained_model: str, dropout_prob: float = 0.1, max_count: int = 10, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, answering_abilities: List[str] = None, number_rep: str = 'first', arithmetic: str = 'base', special_numbers: List[int] = None) -> None: super().__init__(vocab, regularizer) if answering_abilities is None: self.answering_abilities = [ "passage_span_extraction", "question_span_extraction", "arithmetic", "counting" ] else: self.answering_abilities = answering_abilities self.number_rep = number_rep self.BERT = BertModel.from_pretrained(bert_pretrained_model) self.tokenizer = BertTokenizer.from_pretrained(bert_pretrained_model) bert_dim = self.BERT.pooler.dense.out_features self.dropout = dropout_prob self._passage_weights_predictor = torch.nn.Linear(bert_dim, 1) self._question_weights_predictor = torch.nn.Linear(bert_dim, 1) self._number_weights_predictor = torch.nn.Linear(bert_dim, 1) self._arithmetic_weights_predictor = torch.nn.Linear(bert_dim, 1) self._sentence_weights_predictor = torch.nn.Linear(bert_dim, 1) if len(self.answering_abilities) > 1: self._answer_ability_predictor = \ self.ff(2 * bert_dim, bert_dim, len(self.answering_abilities)) if "passage_span_extraction" in self.answering_abilities: self._passage_span_extraction_index = self.answering_abilities.index( "passage_span_extraction") self._passage_span_start_predictor = torch.nn.Linear(bert_dim, 1) self._passage_span_end_predictor = torch.nn.Linear(bert_dim, 1) if "question_span_extraction" in self.answering_abilities: self._question_span_extraction_index = self.answering_abilities.index( "question_span_extraction") self._question_span_start_predictor = \ self.ff(2 * bert_dim, bert_dim, 1) self._question_span_end_predictor = \ self.ff(2 * bert_dim, bert_dim, 1) if "arithmetic" in self.answering_abilities: self.arithmetic = arithmetic self._arithmetic_index = self.answering_abilities.index( "arithmetic") self.special_numbers = special_numbers self.num_special_numbers = len(self.special_numbers) self.special_embedding = torch.nn.Embedding( self.num_special_numbers, bert_dim) if self.arithmetic == "base": self._number_sign_predictor = \ self.ff(2 * bert_dim, bert_dim, 3) else: self.init_arithmetic(bert_dim, bert_dim, bert_dim, layers=2, dropout=dropout_prob) if "counting" in self.answering_abilities: self._counting_index = self.answering_abilities.index("counting") # Original self._count_number_predictor = \ self.ff(bert_dim, bert_dim, max_count + 1) # Regression # self._count_number_predictor = \ # self.ff(2 * bert_dim, bert_dim, 1) # CE: Weighted average # self._count_number_predictor = \ # self.ff(2 * bert_dim, bert_dim, max_count + 1) self.count_classes = torch.arange(max_count + 1).float() self._drop_metrics = DropEmAndF1() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, attention_similarity_function: SimilarityFunction, residual_encoder: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, feed_forward: FeedForward, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(ModelSQUAD, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = MatrixAttention(attention_similarity_function) self._residual_encoder = residual_encoder self._span_end_encoder = span_end_encoder self._span_start_encoder = span_start_encoder self._feed_forward = feed_forward encoding_dim = phrase_layer.get_output_dim() self._span_start_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() self._span_end_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) self._no_answer_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) self._self_matrix_attention = MatrixAttention( attention_similarity_function) self._linear_layer = TimeDistributed( torch.nn.Linear(4 * encoding_dim, encoding_dim)) self._residual_linear_layer = TimeDistributed( torch.nn.Linear(3 * encoding_dim, encoding_dim)) self._w_x = torch.nn.Parameter(torch.Tensor(encoding_dim)) self._w_y = torch.nn.Parameter(torch.Tensor(encoding_dim)) self._w_xy = torch.nn.Parameter(torch.Tensor(encoding_dim)) std = math.sqrt(6 / (encoding_dim * 3 + 1)) self._w_x.data.uniform_(-std, std) self._w_y.data.uniform_(-std, std) self._w_xy.data.uniform_(-std, std) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab: Vocabulary, context_field_embedder: TextFieldEmbedder, context_encoder: Seq2SeqEncoder, target_encoding_pooling_function: str = 'mean', feedforward: Optional[FeedForward] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, dropout: float = 0.0, label_name: str = 'target-sentiment-labels', loss_weights: Optional[List[float]] = None) -> None: super().__init__(vocab, regularizer) ''' :param vocab: A Vocabulary, required in order to compute sizes for input/output projections. :param context_field_embedder: Used to embed the text and target text if target_field_embedder is None but the target_encoder is NOT None. :param context_encoder: Encodes the context sentence/text. :param target_encoding_pooling_function: Pooling function to be used to create a representation for the target from the encoded context. This pooled representation will then be given to the Optional FeedForward layer. This can be either `mean` for mean pooling or `max` for max pooling. If this is `max` a `relu` function is used before the pooling (this is to overcome the padding issue where some vectors will be zero due to padding.). :param feedforward: An optional feed forward layer to apply after the target encoding average function. :param initializer: Used to initialize the model parameters. :param regularizer: If provided, will be used to calculate the regularization penalty during training. :param dropout: To apply dropout after each layer apart from the last layer. All dropout that is applied to timebased data will be `variational dropout`_ all else will be standard dropout. :param label_name: Name of the label name space. :param loss_weights: The amount of weight to give the negative, neutral, positive classes respectively. e.g. [0.2, 0.5, 0.3] would weight the negative class by a factor of 0.2, neutral by 0.5 and positive by 0.3. NOTE It assumes the sentiment labels are the following: [negative, neutral, positive]. This is based on the TD-BERT model by `Gao et al. 2019 <https://ieeexplore.ieee.org/abstract/document/8864964>`_ figure 2. The `target_encoding_pooling_function` when equal to `max` and the `context_field_embedder` is BERT will be identical to TD-BERT. ''' self.label_name = label_name self.context_field_embedder = context_field_embedder self.context_encoder = context_encoder self.num_classes = self.vocab.get_vocab_size(self.label_name) self.feedforward = feedforward allowed_pooling_functions = ['max', 'mean'] if target_encoding_pooling_function not in allowed_pooling_functions: raise ValueError('Target Encoding Pooling function has to be one ' f'of: {allowed_pooling_functions} not: ' f'{target_encoding_pooling_function}') self.target_encoding_pooling_function = target_encoding_pooling_function self.mean_pooler = BagOfEmbeddingsEncoder(self.context_encoder.get_output_dim(), averaged=True) # Set the loss weights (have to sort them by order of label index in # the vocab) self.loss_weights = target_sentiment.util.loss_weight_order(self, loss_weights, self.label_name) if feedforward is not None: output_dim = self.feedforward.get_output_dim() else: output_dim = self.context_encoder.get_output_dim() self.label_projection = Linear(output_dim, self.num_classes) self.metrics = { "accuracy": CategoricalAccuracy() } self.f1_metrics = {} # F1 Scores label_index_name = self.vocab.get_index_to_token_vocabulary(self.label_name) for label_index, _label_name in label_index_name.items(): _label_name = f'F1_{_label_name.capitalize()}' self.f1_metrics[_label_name] = F1Measure(label_index) # Dropout self._variational_dropout = InputVariationalDropout(dropout) check_dimensions_match(context_field_embedder.get_output_dim(), context_encoder.get_input_dim(), 'Embedding', 'Encoder') if self.feedforward is not None: check_dimensions_match(context_encoder.get_output_dim(), feedforward.get_input_dim(), 'Encoder', 'FeedForward') initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, judge: Model = None, update_judge: bool = False, reward_method: str = None, detach_value_head: bool = False, qa_loss_weight: float = 0., influence_reward: bool = False, dataset_name: str = 'squad') -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self.judge = judge self.is_judge = self.judge is None self.reward_method = None if self.is_judge else reward_method self.update_judge = update_judge and (self.judge is not None) self._detach_value_head = detach_value_head self._qa_loss_weight = qa_loss_weight self.influence_reward = influence_reward self.answer_type = 'mc' if dataset_name == 'race' else 'span' self.output_type = 'span' # The actual way the output is given (here it's as a pointer to input) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) if not self.is_judge: self._turn_film_gen = torch.nn.Linear( 1, 2 * modeling_layer.get_input_dim()) self._film = FiLM() self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim if not self.is_judge: self._value_head = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) # Can make MLP self._span_start_predictor = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, att_question_to_choice: SimilarityFunction, question_encoder: Optional[Seq2SeqEncoder] = None, choice_encoder: Optional[Seq2SeqEncoder] = None, initializer: InitializerApplicator = InitializerApplicator(), aggregate_question: Optional[str] = "max", aggregate_choice: Optional[str] = "max", embeddings_dropout_value: Optional[float] = 0.0) -> None: super(QAMultiChoiceMaxAttention, self).__init__(vocab) self._use_cuda = (torch.cuda.is_available() and torch.cuda.current_device() >= 0) self._text_field_embedder = text_field_embedder if embeddings_dropout_value > 0.0: self._embeddings_dropout = torch.nn.Dropout( p=embeddings_dropout_value) else: self._embeddings_dropout = lambda x: x self._question_encoder = question_encoder # choices encoding self._choice_encoder = choice_encoder self._question_aggregate = aggregate_question self._choice_aggregate = aggregate_choice self._num_labels = vocab.get_vocab_size(namespace="labels") question_output_dim = self._text_field_embedder.get_output_dim() if self._question_encoder is not None: question_output_dim = self._question_encoder.get_output_dim() choice_output_dim = self._text_field_embedder.get_output_dim() if self._choice_encoder is not None: choice_output_dim = self._choice_encoder.get_output_dim() if question_output_dim != choice_output_dim: raise ConfigurationError( "Output dimension of the question_encoder (dim: {}) " "and choice_encoder (dim: {})" "must match! ".format(question_output_dim, choice_output_dim)) # Check input tensor dimensions for the question to choices attention (similarity function) if hasattr(att_question_to_choice, "tensor_1_dim"): tensor_1_dim = att_question_to_choice.tensor_1_dim if tensor_1_dim != question_output_dim: raise ConfigurationError( "Output dimension of the question_encoder (dim: {}) " "and tensor_1_dim (dim: {}) of att_question_to_choice" "must match! ".format(question_output_dim, tensor_1_dim)) if hasattr(att_question_to_choice, "tensor_2_dim"): tensor_2_dim = att_question_to_choice.tensor_2_dim if tensor_2_dim != question_output_dim: raise ConfigurationError( "Output dimension of the choice_encoder (dim: {}) " "and tensor_2_dim (dim: {}) of att_question_to_choice" "must match! ".format(choice_output_dim, tensor_2_dim)) self._matrix_attention_question_to_choice = LegacyMatrixAttention( att_question_to_choice) self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)