Beispiel #1
0
    def __init__(
            self,
            vocab: Vocabulary,
            span_encoder: Seq2SeqEncoder,
            reasoning_encoder: Seq2SeqEncoder,
            input_dropout: float = 0.3,
            hidden_dim_maxpool: int = 1024,
            class_embs: bool = True,
            reasoning_use_obj: bool = True,
            reasoning_use_answer: bool = True,
            reasoning_use_question: bool = True,
            pool_reasoning: bool = True,
            pool_answer: bool = True,
            pool_question: bool = False,
            initializer: InitializerApplicator = InitializerApplicator(),
    ):
        super(MultiHopAttentionQABUA, self).__init__(vocab)

        ###################################################################################################

        self.obj_downsample = torch.nn.Sequential(
            torch.nn.Dropout(p=0.1),
            torch.nn.Linear(2048, 512),
            torch.nn.ReLU(inplace=True),
        )

        self.rnn_input_dropout = TimeDistributed(
            InputVariationalDropout(
                input_dropout)) if input_dropout > 0 else None

        self.span_encoder = TimeDistributed(span_encoder)
        self.reasoning_encoder = TimeDistributed(reasoning_encoder)

        self.span_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=span_encoder.get_output_dim(),
        )

        self.obj_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=512,
        )

        self.reasoning_use_obj = reasoning_use_obj
        self.reasoning_use_answer = reasoning_use_answer
        self.reasoning_use_question = reasoning_use_question
        self.pool_reasoning = pool_reasoning
        self.pool_answer = pool_answer
        self.pool_question = pool_question
        dim = sum([
            d for d, to_pool in [(
                reasoning_encoder.get_output_dim(), self.pool_reasoning
            ), (span_encoder.get_output_dim(), self.pool_answer
                ), (span_encoder.get_output_dim(), self.pool_question)]
            if to_pool
        ])

        self.final_mlp = torch.nn.Sequential(
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(dim, hidden_dim_maxpool),
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(hidden_dim_maxpool, 1),
        )
        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()
        initializer(self)
Beispiel #2
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        num_highway_layers: int,
        phrase_layer: Seq2SeqEncoder,
        matrix_attention: MatrixAttention,
        modeling_layer: Seq2SeqEncoder,
        span_end_encoder: Seq2SeqEncoder,
        dropout: float = 0.2,
        mask_lstms: bool = True,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ) -> None:
        super().__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = matrix_attention
        self._modeling_layer = modeling_layer
        self._span_end_encoder = span_end_encoder

        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        span_start_input_dim = encoding_dim * 4 + modeling_dim
        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(span_start_input_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(span_end_input_dim, 1))

        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(
            modeling_layer.get_input_dim(),
            4 * encoding_dim,
            "modeling layer input dim",
            "4 * encoding dim",
        )
        check_dimensions_match(
            text_field_embedder.get_output_dim(),
            phrase_layer.get_input_dim(),
            "text field embedder output dim",
            "phrase layer input dim",
        )
        check_dimensions_match(
            span_end_encoder.get_input_dim(),
            4 * encoding_dim + 3 * modeling_dim,
            "span end encoder input dim",
            "4 * encoding dim + 3 * modeling dim",
        )

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
Beispiel #3
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        num_highway_layers: int,
        phrase_layer: Seq2SeqEncoder,
        matrix_attention_layer: MatrixAttention,
        modeling_layer: Seq2SeqEncoder,
        dropout_prob: float = 0.1,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
        answering_abilities: List[str] = None,
    ) -> None:
        super().__init__(vocab, regularizer)

        if answering_abilities is None:
            self.answering_abilities = [
                "passage_span_extraction",
                "question_span_extraction",
                "addition_subtraction",
                "counting",
            ]
        else:
            self.answering_abilities = answering_abilities

        text_embed_dim = text_field_embedder.get_output_dim()
        encoding_in_dim = phrase_layer.get_input_dim()
        encoding_out_dim = phrase_layer.get_output_dim()
        modeling_in_dim = modeling_layer.get_input_dim()
        modeling_out_dim = modeling_layer.get_output_dim()

        self._text_field_embedder = text_field_embedder

        self._embedding_proj_layer = torch.nn.Linear(text_embed_dim, encoding_in_dim)
        self._highway_layer = Highway(encoding_in_dim, num_highway_layers)

        self._encoding_proj_layer = torch.nn.Linear(encoding_in_dim, encoding_in_dim)
        self._phrase_layer = phrase_layer

        self._matrix_attention = matrix_attention_layer

        self._modeling_proj_layer = torch.nn.Linear(encoding_out_dim * 4, modeling_in_dim)
        self._modeling_layer = modeling_layer

        self._passage_weights_predictor = torch.nn.Linear(modeling_out_dim, 1)
        self._question_weights_predictor = torch.nn.Linear(encoding_out_dim, 1)

        if len(self.answering_abilities) > 1:
            self._answer_ability_predictor = FeedForward(
                modeling_out_dim + encoding_out_dim,
                activations=[Activation.by_name("relu")(), Activation.by_name("linear")()],
                hidden_dims=[modeling_out_dim, len(self.answering_abilities)],
                num_layers=2,
                dropout=dropout_prob,
            )

        if "passage_span_extraction" in self.answering_abilities:
            self._passage_span_extraction_index = self.answering_abilities.index(
                "passage_span_extraction"
            )
            self._passage_span_start_predictor = FeedForward(
                modeling_out_dim * 2,
                activations=[Activation.by_name("relu")(), Activation.by_name("linear")()],
                hidden_dims=[modeling_out_dim, 1],
                num_layers=2,
            )
            self._passage_span_end_predictor = FeedForward(
                modeling_out_dim * 2,
                activations=[Activation.by_name("relu")(), Activation.by_name("linear")()],
                hidden_dims=[modeling_out_dim, 1],
                num_layers=2,
            )

        if "question_span_extraction" in self.answering_abilities:
            self._question_span_extraction_index = self.answering_abilities.index(
                "question_span_extraction"
            )
            self._question_span_start_predictor = FeedForward(
                modeling_out_dim * 2,
                activations=[Activation.by_name("relu")(), Activation.by_name("linear")()],
                hidden_dims=[modeling_out_dim, 1],
                num_layers=2,
            )
            self._question_span_end_predictor = FeedForward(
                modeling_out_dim * 2,
                activations=[Activation.by_name("relu")(), Activation.by_name("linear")()],
                hidden_dims=[modeling_out_dim, 1],
                num_layers=2,
            )

        if "addition_subtraction" in self.answering_abilities:
            self._addition_subtraction_index = self.answering_abilities.index(
                "addition_subtraction"
            )
            self._number_sign_predictor = FeedForward(
                modeling_out_dim * 3,
                activations=[Activation.by_name("relu")(), Activation.by_name("linear")()],
                hidden_dims=[modeling_out_dim, 3],
                num_layers=2,
            )

        if "counting" in self.answering_abilities:
            self._counting_index = self.answering_abilities.index("counting")
            self._count_number_predictor = FeedForward(
                modeling_out_dim,
                activations=[Activation.by_name("relu")(), Activation.by_name("linear")()],
                hidden_dims=[modeling_out_dim, 10],
                num_layers=2,
            )

        self._drop_metrics = DropEmAndF1()
        self._dropout = torch.nn.Dropout(p=dropout_prob)

        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 embedding_dropout: float,
                 pre_encode_feedforward: FeedForward,
                 encoder: Seq2SeqEncoder,
                 integrator: Seq2SeqEncoder,
                 integrator_dropout: float,
                 output_layer: Union[FeedForward, Maxout],
                 elmo: Elmo,
                 use_input_elmo: bool = False,
                 use_integrator_output_elmo: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:

        super(CategoryCrisisELmoClassifier, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        if "elmo" in self._text_field_embedder._token_embedders.keys():  # pylint: disable=protected-access
            raise ConfigurationError(
                "To use ELMo in the BiattentiveClassificationNetwork input, "
                "remove elmo from the text_field_embedder and pass an "
                "Elmo object to the BiattentiveClassificationNetwork and set the "
                "'use_input_elmo' and 'use_integrator_output_elmo' flags accordingly."
            )
        self._embedding_dropout = nn.Dropout(embedding_dropout)
        self._num_classes = self.vocab.get_vocab_size("labels")

        self._pre_encode_feedforward = pre_encode_feedforward
        self._encoder = encoder
        self._integrator = integrator
        self._integrator_dropout = nn.Dropout(integrator_dropout)

        self._elmo = elmo
        self._use_input_elmo = use_input_elmo
        self._use_integrator_output_elmo = use_integrator_output_elmo
        self._num_elmo_layers = int(self._use_input_elmo) + int(
            self._use_integrator_output_elmo)
        # Check that, if elmo is None, none of the elmo flags are set.
        if self._elmo is None and self._num_elmo_layers != 0:
            raise ConfigurationError(
                "One of 'use_input_elmo' or 'use_integrator_output_elmo' is True, "
                "but no Elmo object was provided upon construction. Pass in an Elmo "
                "object to use Elmo.")

        if self._elmo is not None:
            # Check that, if elmo is not None, we use it somewhere.
            if self._num_elmo_layers == 0:
                raise ConfigurationError(
                    "Elmo object provided upon construction, but both 'use_input_elmo' "
                    "and 'use_integrator_output_elmo' are 'False'. Set one of them to "
                    "'True' to use Elmo, or do not provide an Elmo object upon construction."
                )
            # Check that the number of flags set is equal to the num_output_representations of the Elmo object
            # pylint: disable=protected-access,too-many-format-args
            if len(self._elmo._scalar_mixes) != self._num_elmo_layers:
                raise ConfigurationError(
                    "Elmo object has num_output_representations=%s, but this does not "
                    "match the number of use_*_elmo flags set to true. use_input_elmo "
                    "is %s, and use_integrator_output_elmo is %s".format(
                        str(len(self._elmo._scalar_mixes)),
                        str(self._use_input_elmo),
                        str(self._use_integrator_output_elmo)))

        # Calculate combined integrator output dim, taking into account elmo
        if self._use_integrator_output_elmo:
            self._combined_integrator_output_dim = (
                self._integrator.get_output_dim() +
                self._elmo.get_output_dim())
        else:
            self._combined_integrator_output_dim = self._integrator.get_output_dim(
            )

        self._self_attentive_pooling_projection = nn.Linear(
            self._combined_integrator_output_dim, 1)
        self._output_layer = output_layer

        if self._use_input_elmo:
            check_dimensions_match(
                text_field_embedder.get_output_dim() +
                self._elmo.get_output_dim(),
                self._pre_encode_feedforward.get_input_dim(),
                "text field embedder output dim + ELMo output dim",
                "Pre-encoder feedforward input dim")
        else:
            check_dimensions_match(
                text_field_embedder.get_output_dim(),
                self._pre_encode_feedforward.get_input_dim(),
                "text field embedder output dim",
                "Pre-encoder feedforward input dim")

        check_dimensions_match(self._pre_encode_feedforward.get_output_dim(),
                               self._encoder.get_input_dim(),
                               "Pre-encoder feedforward output dim",
                               "Encoder input dim")
        check_dimensions_match(self._encoder.get_output_dim() * 3,
                               self._integrator.get_input_dim(),
                               "Encoder output dim * 3",
                               "Integrator input dim")
        if self._use_integrator_output_elmo:
            check_dimensions_match(
                self._combined_integrator_output_dim * 4,
                self._output_layer.get_input_dim(),
                "(Integrator output dim + ELMo output dim) * 4",
                "Output layer input dim")
        else:
            check_dimensions_match(self._integrator.get_output_dim() * 4,
                                   self._output_layer.get_input_dim(),
                                   "Integrator output dim * 4",
                                   "Output layer input dim")

        check_dimensions_match(self._output_layer.get_output_dim(),
                               self._num_classes, "Output layer output dim",
                               "Number of classes.")

        self.loss = torch.nn.BCEWithLogitsLoss()
        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 matrix_attention_layer: MatrixAttention,
                 modeling_layer: Seq2SeqEncoder,
                 dropout_prob: float = 0.1,
                 use_semantic_views=True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        text_embed_dim = text_field_embedder.get_output_dim()
        encoding_in_dim = phrase_layer.get_input_dim()
        encoding_out_dim = phrase_layer.get_output_dim()
        modeling_in_dim = modeling_layer.get_input_dim()
        modeling_out_dim = modeling_layer.get_output_dim()

        self.return_output_metadata = False

        self.use_semantic_views = use_semantic_views

        self._text_field_embedder = text_field_embedder

        self._embedding_proj_layer = torch.nn.Linear(text_embed_dim,
                                                     encoding_in_dim)
        self._highway_layer = Highway(encoding_in_dim, num_highway_layers)

        self._encoding_proj_layer = torch.nn.Linear(encoding_in_dim,
                                                    encoding_in_dim)
        self._phrase_layer = phrase_layer

        self._matrix_attention = matrix_attention_layer

        self._modeling_proj_layer = torch.nn.Linear(encoding_out_dim * 4,
                                                    modeling_in_dim)
        self._modeling_layer = modeling_layer

        self._span_start_predictor = torch.nn.Linear(modeling_out_dim * 2, 1)
        self._span_end_predictor = torch.nn.Linear(modeling_out_dim * 2, 1)

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()

        self._squad_metrics = SquadEmAndF1Custom()
        self._dropout = torch.nn.Dropout(
            p=dropout_prob) if dropout_prob > 0 else lambda x: x

        # evaluation

        # BLEU
        self._bleu_score_types_to_use = ["BLEU1", "BLEU2", "BLEU3", "BLEU4"]
        self._bleu_scores = {
            x: Average()
            for x in self._bleu_score_types_to_use
        }

        # ROUGE using pyrouge
        self._rouge_score_types_to_use = ['rouge-n', 'rouge-l', 'rouge-w']

        # if we have rouge-n as metric we actualy get n scores like rouge-1, rouge-2, .., rouge-n
        max_rouge_n = 4
        rouge_n_metrics = []
        if "rouge-n" in self._rouge_score_types_to_use:
            rouge_n_metrics = [
                "rouge-{0}".format(x) for x in range(1, max_rouge_n + 1)
            ]

        rouge_scores_names = rouge_n_metrics + [
            y for y in self._rouge_score_types_to_use if y != 'rouge-n'
        ]
        self._rouge_scores = {x: Average() for x in rouge_scores_names}
        self._rouge_evaluator = rouge.Rouge(
            metrics=self._rouge_score_types_to_use,
            max_n=max_rouge_n,
            limit_length=True,
            length_limit=100,
            length_limit_type='words',
            apply_avg=False,
            apply_best=False,
            alpha=0.5,  # Default F1_score
            weight_factor=1.2,
            stemming=True)

        initializer(self)
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        tag_representation_dim: int,
        arc_representation_dim: int,
        tag_feedforward: FeedForward = None,
        arc_feedforward: FeedForward = None,
        pos_tag_embedding: Embedding = None,
        dropout: float = 0.0,
        input_dropout: float = 0.0,
        edge_prediction_threshold: float = 0.5,
        initializer: InitializerApplicator = InitializerApplicator(),
        **kwargs,
    ) -> None:
        super().__init__(vocab, **kwargs)

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder
        self.edge_prediction_threshold = edge_prediction_threshold
        if not 0 < edge_prediction_threshold < 1:
            raise ConfigurationError(
                f"edge_prediction_threshold must be between "
                f"0 and 1 (exclusive) but found {edge_prediction_threshold}.")

        encoder_dim = encoder.get_output_dim()

        self.head_arc_feedforward = arc_feedforward or FeedForward(
            encoder_dim, 1, arc_representation_dim,
            Activation.by_name("elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        num_labels = self.vocab.get_vocab_size("labels")
        self.head_tag_feedforward = tag_feedforward or FeedForward(
            encoder_dim, 1, tag_representation_dim,
            Activation.by_name("elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim,
                                                    tag_representation_dim,
                                                    label_dim=num_labels)

        self._pos_tag_embedding = pos_tag_embedding or None
        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)

        representation_dim = text_field_embedder.get_output_dim()
        if pos_tag_embedding is not None:
            representation_dim += pos_tag_embedding.get_output_dim()

        check_dimensions_match(
            representation_dim,
            encoder.get_input_dim(),
            "text field embedding dim",
            "encoder input dim",
        )
        check_dimensions_match(
            tag_representation_dim,
            self.head_tag_feedforward.get_output_dim(),
            "tag representation dim",
            "tag feedforward output dim",
        )
        check_dimensions_match(
            arc_representation_dim,
            self.head_arc_feedforward.get_output_dim(),
            "arc representation dim",
            "arc feedforward output dim",
        )

        self._unlabelled_f1 = F1Measure(positive_label=1)
        self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction="none")
        self._tag_loss = torch.nn.CrossEntropyLoss(reduction="none")
        initializer(self)
Beispiel #7
0
    def __init__(self,
                 vocab: Vocabulary,
                 params: Params,
                 regularizer: RegularizerApplicator = None):

        super(HMTL, self).__init__(vocab=vocab, regularizer=regularizer)

        # Base text Field Embedder
        text_field_embedder_params = params.pop("text_field_embedder")
        text_field_embedder = BasicTextFieldEmbedder.from_params(
            vocab=vocab, params=text_field_embedder_params)
        self._text_field_embedder = text_field_embedder

        ############
        # NER Stuffs
        ############
        ner_params = params.pop("ner")

        # Encoder
        encoder_ner_params = ner_params.pop("encoder")
        encoder_ner = Seq2SeqEncoder.from_params(encoder_ner_params)
        self._encoder_ner = encoder_ner

        # Tagger NER - CRF Tagger
        tagger_ner_params = ner_params.pop("tagger")
        tagger_ner = CrfTagger(
            vocab=vocab,
            text_field_embedder=self._text_field_embedder,
            encoder=self._encoder_ner,
            label_namespace=tagger_ner_params.pop("label_namespace", "labels"),
            constraint_type=tagger_ner_params.pop("constraint_type", None),
            dropout=tagger_ner_params.pop("dropout", None),
            regularizer=regularizer)
        self._tagger_ner = tagger_ner

        ############
        # EMD Stuffs
        ############
        emd_params = params.pop("emd")

        # Encoder
        encoder_emd_params = emd_params.pop("encoder")
        encoder_emd = Seq2SeqEncoder.from_params(encoder_emd_params)
        self._encoder_emd = encoder_emd

        shortcut_text_field_embedder = ShortcutConnectTextFieldEmbedder(
            base_text_field_embedder=self._text_field_embedder,
            previous_encoders=[self._encoder_ner])
        self._shortcut_text_field_embedder = shortcut_text_field_embedder

        # Tagger: EMD - CRF Tagger
        tagger_emd_params = emd_params.pop("tagger")
        tagger_emd = CrfTagger(
            vocab=vocab,
            text_field_embedder=self._shortcut_text_field_embedder,
            encoder=self._encoder_emd,
            label_namespace=tagger_emd_params.pop("label_namespace", "labels"),
            constraint_type=tagger_emd_params.pop("constraint_type", None),
            dropout=tagger_ner_params.pop("dropout", None),
            regularizer=regularizer)
        self._tagger_emd = tagger_emd

        ############################
        # Relation Extraction Stuffs
        ############################
        relation_params = params.pop("relation")

        # Encoder
        encoder_relation_params = relation_params.pop("encoder")
        encoder_relation = Seq2SeqEncoder.from_params(encoder_relation_params)
        self._encoder_relation = encoder_relation

        shortcut_text_field_embedder_relation = ShortcutConnectTextFieldEmbedder(
            base_text_field_embedder=self._text_field_embedder,
            previous_encoders=[self._encoder_ner, self._encoder_emd])
        self._shortcut_text_field_embedder_relation = shortcut_text_field_embedder_relation

        # Tagger: Relation
        tagger_relation_params = relation_params.pop("tagger")
        tagger_relation = RelationExtractor(
            vocab=vocab,
            text_field_embedder=self._shortcut_text_field_embedder_relation,
            context_layer=self._encoder_relation,
            d=tagger_relation_params.pop_int("d"),
            l=tagger_relation_params.pop_int("l"),
            n_classes=tagger_relation_params.pop("n_classes"),
            activation=tagger_relation_params.pop("activation"))
        self._tagger_relation = tagger_relation

        ##############
        # Coref Stuffs
        ##############
        coref_params = params.pop("coref")

        # Encoder
        encoder_coref_params = coref_params.pop("encoder")
        encoder_coref = Seq2SeqEncoder.from_params(encoder_coref_params)
        self._encoder_coref = encoder_coref

        shortcut_text_field_embedder_coref = ShortcutConnectTextFieldEmbedder(
            base_text_field_embedder=self._text_field_embedder,
            previous_encoders=[self._encoder_ner, self._encoder_emd])
        self._shortcut_text_field_embedder_coref = shortcut_text_field_embedder_coref

        # Tagger: Coreference
        tagger_coref_params = coref_params.pop("tagger")
        eval_on_gold_mentions = tagger_coref_params.pop_bool(
            "eval_on_gold_mentions", False)
        init_params = tagger_coref_params.pop("initializer", None)
        initializer = (InitializerApplicator.from_params(init_params)
                       if init_params is not None else InitializerApplicator())

        tagger_coref = CoreferenceCustom(
            vocab=vocab,
            text_field_embedder=self._shortcut_text_field_embedder_coref,
            context_layer=self._encoder_coref,
            mention_feedforward=FeedForward.from_params(
                tagger_coref_params.pop("mention_feedforward")),
            antecedent_feedforward=FeedForward.from_params(
                tagger_coref_params.pop("antecedent_feedforward")),
            feature_size=tagger_coref_params.pop_int("feature_size"),
            max_span_width=tagger_coref_params.pop_int("max_span_width"),
            spans_per_word=tagger_coref_params.pop_float("spans_per_word"),
            max_antecedents=tagger_coref_params.pop_int("max_antecedents"),
            lexical_dropout=tagger_coref_params.pop_float(
                "lexical_dropout", 0.2),
            initializer=initializer,
            regularizer=regularizer,
            eval_on_gold_mentions=eval_on_gold_mentions)
        self._tagger_coref = tagger_coref
        if eval_on_gold_mentions:
            self._tagger_coref._eval_on_gold_mentions = True

        logger.info("Multi-Task Learning Model has been instantiated.")
    def __init__(
        self,
        vocab: Vocabulary,
        source_embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        attention: Attention,
        beam_size: int,
        max_decoding_steps: int,
        target_embedding_dim: int = 30,
        copy_token: str = "@COPY@",
        source_namespace: str = "source_tokens",
        target_namespace: str = "target_tokens",
        tensor_based_metric: Metric = None,
        token_based_metric: Metric = None,
        initializer: InitializerApplicator = InitializerApplicator(),
    ) -> None:
        super().__init__(vocab)
        self._source_namespace = source_namespace
        self._target_namespace = target_namespace
        self._src_start_index = self.vocab.get_token_index(
            START_SYMBOL, self._source_namespace)
        self._src_end_index = self.vocab.get_token_index(
            END_SYMBOL, self._source_namespace)
        self._start_index = self.vocab.get_token_index(START_SYMBOL,
                                                       self._target_namespace)
        self._end_index = self.vocab.get_token_index(END_SYMBOL,
                                                     self._target_namespace)
        self._oov_index = self.vocab.get_token_index(self.vocab._oov_token,
                                                     self._target_namespace)
        self._pad_index = self.vocab.get_token_index(self.vocab._padding_token,
                                                     self._target_namespace)
        self._copy_index = self.vocab.add_token_to_namespace(
            copy_token, self._target_namespace)

        self._tensor_based_metric = tensor_based_metric or BLEU(
            exclude_indices={
                self._pad_index, self._end_index, self._start_index
            })
        self._token_based_metric = token_based_metric

        self._target_vocab_size = self.vocab.get_vocab_size(
            self._target_namespace)

        # Encoding modules.
        self._source_embedder = source_embedder
        self._encoder = encoder

        # Decoder output dim needs to be the same as the encoder output dim since we initialize the
        # hidden state of the decoder with the final hidden state of the encoder.
        # We arbitrarily set the decoder's input dimension to be the same as the output dimension.
        self.encoder_output_dim = self._encoder.get_output_dim()
        self.decoder_output_dim = self.encoder_output_dim
        self.decoder_input_dim = self.decoder_output_dim

        target_vocab_size = self.vocab.get_vocab_size(self._target_namespace)

        # The decoder input will be a function of the embedding of the previous predicted token,
        # an attended encoder hidden state called the "attentive read", and another
        # weighted sum of the encoder hidden state called the "selective read".
        # While the weights for the attentive read are calculated by an `Attention` module,
        # the weights for the selective read are simply the predicted probabilities
        # corresponding to each token in the source sentence that matches the target
        # token from the previous timestep.
        self._target_embedder = Embedding(num_embeddings=target_vocab_size,
                                          embedding_dim=target_embedding_dim)
        self._attention = attention
        self._input_projection_layer = Linear(
            target_embedding_dim + self.encoder_output_dim * 2,
            self.decoder_input_dim)

        # We then run the projected decoder input through an LSTM cell to produce
        # the next hidden state.
        self._decoder_cell = LSTMCell(self.decoder_input_dim,
                                      self.decoder_output_dim)

        # We create a "generation" score for each token in the target vocab
        # with a linear projection of the decoder hidden state.
        self._output_generation_layer = Linear(self.decoder_output_dim,
                                               target_vocab_size)

        # We create a "copying" score for each source token by applying a non-linearity
        # (tanh) to a linear projection of the encoded hidden state for that token,
        # and then taking the dot product of the result with the decoder hidden state.
        self._output_copying_layer = Linear(self.encoder_output_dim,
                                            self.decoder_output_dim)

        # At prediction time, we'll use a beam search to find the best target sequence.
        self._beam_search = BeamSearch(self._end_index,
                                       max_steps=max_decoding_steps,
                                       beam_size=beam_size)

        initializer(self)
Beispiel #9
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 label_namespace: str = "labels",
                 constraint_type: str = None,
                 feedforward: FeedForward = FeedForward(
                     input_dim=66,
                     num_layers=100,
                     hidden_dims=64,
                     activations=torch.nn.ReLU(),
                     dropout=0.5),
                 include_start_end_transitions: bool = True,
                 dropout: float = None,
                 verbose_metrics: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder
        self._verbose_metrics = verbose_metrics
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self._feedforward = feedforward

        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        else:
            output_dim = self.encoder.get_output_dim()
        self.tag_projection_layer = TimeDistributed(
            Linear(output_dim, self.num_tags))

        if constraint_type is not None:
            labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
            constraints = allowed_transitions(constraint_type, labels)
        else:
            constraints = None

        self.crf = ConditionalRandomField(
            self.num_tags,
            constraints,
            include_start_end_transitions=include_start_end_transitions)

        self.span_metric = SpanBasedF1Measure(vocab,
                                              tag_namespace=label_namespace,
                                              label_encoding=constraint_type
                                              or "BIO")

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        if feedforward is not None:
            check_dimensions_match(encoder.get_output_dim(),
                                   feedforward.get_input_dim(),
                                   "encoder output dim",
                                   "feedforward input dim")
        initializer(self)
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        label_namespace: str = "labels",
        label_encoding: Optional[str] = None,
        include_start_end_transitions: bool = True,
        calculate_span_f1: bool = None,
        dropout: Optional[float] = None,
        tcn_level: Optional[int] = None,
        tcn_input_size: Optional[int] = None,
        kernel_size: Optional[int] = None,
        tcn_hidden_size: Optional[int] = None,
        verbose_metrics: bool = False,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ) -> None:
        super().__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self._verbose_metrics = verbose_metrics
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self.tcn_level = tcn_level
        self.tcn_input_size = tcn_input_size
        self.kernel_size = kernel_size
        self.tcn_hidden_size = tcn_hidden_size
        self.num_channels = [self.tcn_hidden_size] * self.tcn_level

        self.tag_projection_layer = TimeDistributed(
            Linear(self.tcn_hidden_size, self.num_tags))

        if calculate_span_f1 is None:
            calculate_span_f1 = label_encoding is not None

        self.label_encoding = label_encoding

        self.include_start_end_transitions = include_start_end_transitions

        self.tcn = tch_layer.TemporalConvNet(self.tcn_input_size,
                                             self.num_channels,
                                             kernel_size,
                                             dropout=dropout)

        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3),
        }
        self.calculate_span_f1 = calculate_span_f1
        if calculate_span_f1:
            if not label_encoding:
                raise ConfigurationError("calculate_span_f1 is True, but "
                                         "no label_encoding was specified.")
            self._f1_metric = SpanBasedF1Measure(vocab,
                                                 tag_namespace=label_namespace,
                                                 label_encoding=label_encoding)
        initializer(self)
Beispiel #11
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        tag_representation_dim: int,
        arc_representation_dim: int,
        activation=Activation.by_name("tanh")(),
        lemma_tag_embedding: Embedding = None,
        upos_tag_embedding: Embedding = None,
        xpos_tag_embedding: Embedding = None,
        feats_tag_embedding: Embedding = None,
        dropout: float = 0.0,
        input_dropout: float = 0.0,
        edge_prediction_threshold: float = 0.5,
        initializer: InitializerApplicator = InitializerApplicator(),
        **kwargs,
    ) -> None:
        super().__init__(vocab, **kwargs)

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder
        self.activation = activation
        self.edge_prediction_threshold = edge_prediction_threshold
        if not 0 < edge_prediction_threshold < 1:
            raise ConfigurationError(
                f"edge_prediction_threshold must be between "
                f"0 and 1 (exclusive) but found {edge_prediction_threshold}.")

        encoder_dim = encoder.get_output_dim()

        # these two matrices together form the feed forward network which takes the vectors of the two words in question and makes predictions from that
        # this is the trick described by Kiperwasser and Goldberg to make training faster.
        self.edge_head = Linear(encoder_dim, arc_representation_dim)
        self.edge_dep = Linear(
            encoder_dim, arc_representation_dim,
            bias=False)  # bias is already added by edge_head

        self.tag_head = Linear(encoder_dim, tag_representation_dim)
        self.tag_dep = Linear(encoder_dim, tag_representation_dim, bias=False)

        num_labels = self.vocab.get_vocab_size("deps")

        self.arc_out_layer = Linear(
            arc_representation_dim, 1,
            bias=False)  # no bias in output layer of K&G model
        self.tag_out_layer = Linear(arc_representation_dim, num_labels)

        self._lemma_tag_embedding = lemma_tag_embedding or None
        self._upos_tag_embedding = upos_tag_embedding or None
        self._xpos_tag_embedding = xpos_tag_embedding or None
        self._feats_tag_embedding = feats_tag_embedding or None

        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)

        # add a head sentinel to accommodate for extra root token
        self._head_sentinel = torch.nn.Parameter(
            torch.randn([1, 1, encoder.get_output_dim()]))

        representation_dim = text_field_embedder.get_output_dim()
        if lemma_tag_embedding is not None:
            representation_dim += lemma_tag_embedding.get_output_dim()
        if upos_tag_embedding is not None:
            representation_dim += upos_tag_embedding.get_output_dim()
        if xpos_tag_embedding is not None:
            representation_dim += xpos_tag_embedding.get_output_dim()
        if feats_tag_embedding is not None:
            representation_dim += feats_tag_embedding.get_output_dim()

        check_dimensions_match(
            representation_dim,
            encoder.get_input_dim(),
            "text field embedding dim",
            "encoder input dim",
        )

        self._enhanced_attachment_scores = EnhancedAttachmentScores()
        self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction="none")
        self._tag_loss = torch.nn.CrossEntropyLoss(reduction="none")
        initializer(self)
Beispiel #12
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        use_attention: bool = False,
        use_positional_encoding: bool = False,
        label_namespace: str = "labels",
        feedforward: Optional[FeedForward] = None,
        label_encoding: Optional[str] = None,
        include_start_end_transitions: bool = True,
        has_mode: bool = False,
        constrain_crf_decoding: bool = None,
        calculate_span_f1: bool = None,
        calculate_relation_f1: bool = False,
        dropout: Optional[float] = None,
        verbose_metrics: bool = False,
        initializer: InitializerApplicator = InitializerApplicator(),
        top_k: int = 1,
        max_relation_width:int = 11,
        **kwargs,
    ) -> None:
        super().__init__(vocab, **kwargs)
        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder
        self.top_k = top_k
        self._verbose_metrics = verbose_metrics
        self.use_attention = use_attention
        self.use_positional_encoding = use_positional_encoding
        self._sample_probability = compounding(0.1, 1.0, 0.99)

        self.has_mode = has_mode
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self._feedforward = feedforward

        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        else:
            output_dim = self.encoder.get_output_dim()
        self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags))

        if self.use_attention:
            self._attention = SelfAttentionGRU(
                output_dim,
                embedding_size=encoder.get_output_dim(),
                rnn_hidden_size=encoder.get_output_dim(),
                bos_index=self.vocab.get_token_index("O", label_namespace)
            )

        if self.use_positional_encoding:
            self.positional_encoding = PositionalEncoding(d_model=encoder.get_output_dim(),dropout=dropout)

        # if  constrain_crf_decoding and calculate_span_f1 are not
        # provided, (i.e., they're None), set them to True
        # if label_encoding is provided and False if it isn't.
        if constrain_crf_decoding is None:
            constrain_crf_decoding = label_encoding is not None
        if calculate_span_f1 is None:
            calculate_span_f1 = label_encoding is not None

        self.label_encoding = label_encoding
        if constrain_crf_decoding:
            if not label_encoding:
                raise ConfigurationError(
                    "constrain_crf_decoding is True, but no label_encoding was specified."
                )
            labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
            constraints = allowed_transitions(label_encoding, labels)
        else:
            constraints = None

        self.include_start_end_transitions = include_start_end_transitions
        self.crf = ConditionalRandomField(
            self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions
        )

        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3),
        }
        self.calculate_span_f1 = calculate_span_f1
        if calculate_span_f1:
            if not label_encoding:
                raise ConfigurationError(
                    "calculate_span_f1 is True, but no label_encoding was specified."
                )
            self._f1_metric = SpanBasedF1Measure(
                vocab, tag_namespace=label_namespace, label_encoding=label_encoding
            )

        self.calculate_relation_f1 = calculate_relation_f1
        if calculate_relation_f1:
            self._relation_f1_metric = RelationMetric(
                vocab, tag_namespace=label_namespace, label_encoding=label_encoding, has_mode=has_mode, max_relation_width=max_relation_width
            )

        check_dimensions_match(
            text_field_embedder.get_output_dim(),
            encoder.get_input_dim(),
            "text field embedding dim",
            "encoder input dim",
        )
        if feedforward is not None:
            check_dimensions_match(
                encoder.get_output_dim(),
                feedforward.get_input_dim(),
                "encoder output dim",
                "feedforward input dim",
            )

        self.j = 0
        initializer(self)
Beispiel #13
0
                                                          hidden_dim=100,
                                                          num_perspectives=10),
                          aggregator=PytorchSeq2VecWrapper(
                              nn.LSTM(input_size=110,
                                      hidden_size=100,
                                      bidirectional=True,
                                      num_layers=2,
                                      batch_first=True,
                                      dropout=0.5)),
                          classifier_feedforward=FeedForward(
                              input_dim=400,
                              num_layers=2,
                              hidden_dims=[200, 1],
                              activations=[activ_relu, activ_linear],
                              dropout=[0.5, 0]),
                          initializer=InitializerApplicator(),
                          regularizer=None)
"""tag处理模块"""
tag_ff = TagFF(
    vocab, word_embeddings,
    FeedForward(input_dim=300,
                num_layers=3,
                hidden_dims=[100, 100, 10],
                activations=[activ_relu, activ_relu, activ_relu]))
"""定义模型"""
model = OppoLWZ(
    vocab=vocab,
    similar_unit=similar_bimpm,
    tag_feedforward=tag_ff,
    classifier_feedforward=FeedForward(
        input_dim=16,
Beispiel #14
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder_word: Seq2SeqEncoder,
                 attn_word: attention_module.BaseAttention,
                 attn_sent: attention_module.BaseAttention,
                 encoder_sent: Seq2SeqEncoder,
                 thresh: float = 0.5,
                 label_namespace: str = "labels",
                 dropout: float = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 label_indexer: str = "LabelIndicesBiMap") -> None:
        super(HierAttnNetworkClassifier, self).__init__(vocab, regularizer)

        # Label Information
        self.label_namespace = label_namespace
        self.label_indexer = eval(label_indexer)
        # FIXME: Implement this
        self.num_labels = self.label_indexer.get_num_labels()
        # Prediction thresholds
        self.thresh = thresh
        self.log_thresh = np.log(thresh + 1e-5)

        # Model
        # Text encoders
        self.text_field_embedder = text_field_embedder
        # Sentence and doc encoders
        self.encoder_word = encoder_word
        self.encoder_sent = encoder_sent
        # Attention Modules
        self.key_dim = attn_sent.get_key_dim()
        self.attn_word = attn_word
        self.attn_sent = attn_sent

        if dropout:
            self.dropout = Dropout(dropout)
        else:
            self.dropout = None

        # Label prediction
        self.output_dim = self.attn_sent.get_output_dim()
        self.logits_layer = Linear(self.output_dim, self.num_labels)
        self.classification_metric = ClassificationMetrics(
            self.num_labels, label_indexer)
        initializer(self)

        # Some dimension checks
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               encoder_word.get_input_dim(),
                               "text field embedding dim",
                               "word encoder input dim")
        check_dimensions_match(encoder_word.get_output_dim(),
                               attn_word.get_input_dim(),
                               "word encoder output", "word attention input")
        check_dimensions_match(attn_word.get_output_dim(),
                               encoder_sent.get_input_dim(),
                               "word attention output", "sent encoder input")
        check_dimensions_match(encoder_sent.get_output_dim(),
                               attn_sent.get_input_dim(),
                               "sent encoder output", "sent attn input")
Beispiel #15
0
    def __init__(self,
                 vocab: Vocabulary,
                 bow_embedder: TokenEmbedder,
                 vae: VAE,
                 kl_weight_annealing: str = "constant",
                 linear_scaling: float = 1000.0,
                 sigmoid_weight_1: float = 0.25,
                 sigmoid_weight_2: float = 15,
                 reference_counts: str = None,
                 reference_vocabulary: str = None,
                 background_data_path: str = None,
                 update_background_freq: bool = False,
                 track_topics: bool = True,
                 track_npmi: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.metrics = {'nkld': Average(), 'nll': Average()}

        self.vocab = vocab
        self.vae = vae
        self.track_topics = track_topics
        self.track_npmi = track_npmi
        self.vocab_namespace = "vampire"
        self._update_background_freq = update_background_freq
        self._background_freq = self.initialize_bg_from_file(
            file_=background_data_path)
        self._ref_counts = reference_counts

        self._npmi_updated = False

        if reference_vocabulary is not None:
            # Compute data necessary to compute NPMI every epoch
            logger.info("Loading reference vocabulary.")
            self._ref_vocab = read_json(cached_path(reference_vocabulary))
            self._ref_vocab_index = dict(
                zip(self._ref_vocab, range(len(self._ref_vocab))))
            logger.info("Loading reference count matrix.")
            self._ref_count_mat = load_sparse(cached_path(self._ref_counts))
            logger.info("Computing word interaction matrix.")
            self._ref_doc_counts = (self._ref_count_mat > 0).astype(float)
            self._ref_interaction = (self._ref_doc_counts).T.dot(
                self._ref_doc_counts)
            self._ref_doc_sum = np.array(
                self._ref_doc_counts.sum(0).tolist()[0])
            logger.info("Generating npmi matrices.")
            (self._npmi_numerator,
             self._npmi_denominator) = self.generate_npmi_vals(
                 self._ref_interaction, self._ref_doc_sum)
            self.n_docs = self._ref_count_mat.shape[0]

        vampire_vocab_size = self.vocab.get_vocab_size(self.vocab_namespace)
        self._bag_of_words_embedder = bow_embedder

        self._kl_weight_annealing = kl_weight_annealing

        self._linear_scaling = float(linear_scaling)
        self._sigmoid_weight_1 = float(sigmoid_weight_1)
        self._sigmoid_weight_2 = float(sigmoid_weight_2)
        if kl_weight_annealing == "linear":
            self._kld_weight = min(1, 1 / self._linear_scaling)
        elif kl_weight_annealing == "sigmoid":
            self._kld_weight = float(
                1 / (1 + np.exp(-self._sigmoid_weight_1 *
                                (1 - self._sigmoid_weight_2))))
        elif kl_weight_annealing == "constant":
            self._kld_weight = 1.0
        else:
            raise ConfigurationError(
                "anneal type {} not found".format(kl_weight_annealing))

        # setup batchnorm
        self.bow_bn = torch.nn.BatchNorm1d(vampire_vocab_size,
                                           eps=0.001,
                                           momentum=0.001,
                                           affine=True)
        self.bow_bn.weight.data.copy_(
            torch.ones(vampire_vocab_size, dtype=torch.float64))
        self.bow_bn.weight.requires_grad = False

        # Maintain these states for periodically printing topics and updating KLD
        self._metric_epoch_tracker = 0
        self._kl_epoch_tracker = 0
        self._cur_epoch = 0
        self._cur_npmi = 0.0
        self.batch_num = 0

        initializer(self)
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        tag_representation_dim: int,
        arc_representation_dim: int,
        tag_feedforward: FeedForward = None,
        arc_feedforward: FeedForward = None,
        lemma_tag_embedding: Embedding = None,
        upos_tag_embedding: Embedding = None,
        xpos_tag_embedding: Embedding = None,
        feats_tag_embedding: Embedding = None,
        dropout: float = 0.0,
        input_dropout: float = 0.0,
        edge_prediction_threshold: float = 0.5,
        initializer: InitializerApplicator = InitializerApplicator(),
        **kwargs,
    ) -> None:
        super().__init__(vocab, **kwargs)

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder
        self.edge_prediction_threshold = edge_prediction_threshold
        if not 0 < edge_prediction_threshold < 1:
            raise ConfigurationError(f"edge_prediction_threshold must be between "
                                     f"0 and 1 (exclusive) but found {edge_prediction_threshold}.")

        encoder_dim = encoder.get_output_dim()

        self.head_arc_feedforward = arc_feedforward or FeedForward(
            encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()
        )
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(
            arc_representation_dim, arc_representation_dim, use_input_biases=True
        )

        num_labels = self.vocab.get_vocab_size("deps")
        self.head_tag_feedforward = tag_feedforward or FeedForward(
            encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()
        )
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = BilinearMatrixAttention(
            tag_representation_dim, tag_representation_dim, label_dim=num_labels
        )

        self._lemma_tag_embedding = lemma_tag_embedding or None
        self._upos_tag_embedding = upos_tag_embedding or None
        self._xpos_tag_embedding = xpos_tag_embedding or None
        self._feats_tag_embedding = feats_tag_embedding or None

        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)

        # add a head sentinel to accommodate for extra root token in EUD graphs
        self._head_sentinel = torch.nn.Parameter(torch.randn([1, 1, encoder.get_output_dim()]))

        representation_dim = text_field_embedder.get_output_dim()
        if lemma_tag_embedding is not None:
            representation_dim += lemma_tag_embedding.get_output_dim()
        if upos_tag_embedding is not None:
            representation_dim += upos_tag_embedding.get_output_dim()
        if xpos_tag_embedding is not None:
            representation_dim += xpos_tag_embedding.get_output_dim()
        if feats_tag_embedding is not None:
            representation_dim += feats_tag_embedding.get_output_dim()

        check_dimensions_match(
            representation_dim,
            encoder.get_input_dim(),
            "text field embedding dim",
            "encoder input dim",
        )
        check_dimensions_match(
            tag_representation_dim,
            self.head_tag_feedforward.get_output_dim(),
            "tag representation dim",
            "tag feedforward output dim",
        )
        check_dimensions_match(
            arc_representation_dim,
            self.head_arc_feedforward.get_output_dim(),
            "arc representation dim",
            "arc feedforward output dim",
        )

        self._enhanced_attachment_scores = EnhancedAttachmentScores()
        self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction="none")
        self._tag_loss = torch.nn.CrossEntropyLoss(reduction="none")
        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 label_namespace: str = "labels",
                 feedforward: Optional[FeedForward] = None,
                 label_encoding: Optional[str] = None,
                 include_start_end_transitions: bool = True,
                 constrain_crf_decoding: bool = None,
                 calculate_span_f1: bool = None,
                 dropout: Optional[float] = None,
                 verbose_metrics: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 num_virtual_models: int = 0) -> None:
        super().__init__(vocab, regularizer)

        self.num_virtual_models = num_virtual_models

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder
        self._verbose_metrics = verbose_metrics
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self._feedforward = feedforward

        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        else:
            output_dim = self.encoder.get_output_dim()
        self.tag_projection_layer = TimeDistributed(
            Linear(output_dim, self.num_tags))

        # if  constrain_crf_decoding and calculate_span_f1 are not
        # provided, (i.e., they're None), set them to True
        # if label_encoding is provided and False if it isn't.
        if constrain_crf_decoding is None:
            constrain_crf_decoding = label_encoding is not None
        if calculate_span_f1 is None:
            calculate_span_f1 = label_encoding is not None

        self.label_encoding = label_encoding
        if constrain_crf_decoding:
            if not label_encoding:
                raise ConfigurationError("constrain_crf_decoding is True, but "
                                         "no label_encoding was specified.")
            labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
            constraints = allowed_transitions(label_encoding, labels)
        else:
            constraints = None

        self.include_start_end_transitions = include_start_end_transitions
        self.crf = ConditionalRandomField(
            self.num_tags,
            constraints,
            include_start_end_transitions=include_start_end_transitions)

        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3),
        }
        self.calculate_span_f1 = calculate_span_f1
        if calculate_span_f1:
            if not label_encoding:
                raise ConfigurationError("calculate_span_f1 is True, but "
                                         "no label_encoding was specified.")
            self._f1_metric = SpanBasedF1Measure(vocab,
                                                 tag_namespace=label_namespace,
                                                 label_encoding=label_encoding)

        check_dimensions_match(
            text_field_embedder.get_output_dim(),
            encoder.get_input_dim(),
            "text field embedding dim",
            "encoder input dim",
        )
        if feedforward is not None:
            check_dimensions_match(
                encoder.get_output_dim(),
                feedforward.get_input_dim(),
                "encoder output dim",
                "feedforward input dim",
            )
        self.index_dict = {
            "[pseudo1]": 0,
            "[pseudo2]": 1,
            "[pseudo3]": 2,
            "[pseudo4]": 3,
            "[pseudo5]": 4,
            "[pseudo6]": 5,
            "[pseudo7]": 6,
            "[pseudo8]": 7,
            "[pseudo9]": 8
        }
        self.orthogonal_embedding_emb = torch.nn.init.orthogonal_(
            torch.empty(self.num_virtual_models,
                        text_field_embedder.get_output_dim(),
                        requires_grad=False)).float()
        self.orthogonal_embedding_hidden = torch.nn.init.orthogonal_(
            torch.empty(self.num_virtual_models,
                        encoder.get_output_dim(),
                        requires_grad=False)).float()

        self.vocab = vocab

        initializer(self)
Beispiel #18
0
    def __init__(self,
                 vocab: Vocabulary,
                 token_representation_dim: int,
                 encoder: Optional[Seq2SeqEncoder] = None,
                 decoder: Optional[Union[FeedForward, str]] = None,
                 contextualizer: Optional[Contextualizer] = None,
                 pretrained_file: Optional[str] = None,
                 transfer_contextualizer_from_pretrained_file: bool = False,
                 transfer_encoder_from_pretrained_file: bool = False,
                 freeze_encoder: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(SelectiveRegressor, self).__init__(vocab, regularizer)

        self._token_representation_dim = token_representation_dim
        self._contextualizer = contextualizer
        if encoder is None:
            encoder = PassThroughEncoder(
                input_dim=self._token_representation_dim)
        self._encoder = encoder

        # Load the contextualizer and encoder weights from the
        # pretrained_file if applicable
        if pretrained_file:
            archive = None
            if self._contextualizer and transfer_contextualizer_from_pretrained_file:
                logger.info("Attempting to load contextualizer weights from "
                            "pretrained_file at {}".format(pretrained_file))
                archive = load_archive(cached_path(pretrained_file))
                contextualizer_state = archive.model._contextualizer.state_dict(
                )
                contextualizer_layer_num = self._contextualizer._layer_num
                self._contextualizer.load_state_dict(contextualizer_state)
                if contextualizer_layer_num is not None:
                    logger.info("Setting layer num to {}".format(
                        contextualizer_layer_num))
                    self._contextualizer.set_layer_num(
                        contextualizer_layer_num)
                else:
                    self._contextualizer.reset_layer_num()
                logger.info("Successfully loaded contextualizer weights!")
            if transfer_encoder_from_pretrained_file:
                logger.info("Attempting to load encoder weights from "
                            "pretrained_file at {}".format(pretrained_file))
                if archive is None:
                    archive = load_archive(cached_path(pretrained_file))
                encoder_state = archive.model._encoder.state_dict()
                self._encoder.load_state_dict(encoder_state)
                logger.info("Successfully loaded encoder weights!")

        self._freeze_encoder = freeze_encoder
        for parameter in self._encoder.parameters():
            # If freeze is true, requires_grad should be false and vice versa.
            parameter.requires_grad_(not self._freeze_encoder)

        if decoder is None or decoder == "linear":
            # Create the default decoder (logistic regression) if it is not provided.
            decoder = FeedForward.from_params(
                Params({
                    "input_dim": self._encoder.get_output_dim(),
                    "num_layers": 1,
                    "hidden_dims": 1,
                    "activations": "linear"
                }))
            logger.info("No decoder provided to model, using default "
                        "decoder: {}".format(decoder))
        elif decoder == "mlp":
            # Create the MLP decoder
            decoder = FeedForward.from_params(
                Params({
                    "input_dim": self._encoder.get_output_dim(),
                    "num_layers": 2,
                    "hidden_dims": [1024, 1],
                    "activations": ["relu", "linear"]
                }))
            logger.info("Using MLP decoder: {}".format(decoder))
        self._decoder = decoder

        check_dimensions_match(self._token_representation_dim,
                               self._encoder.get_input_dim(),
                               "token representation dim", "encoder input dim")
        check_dimensions_match(self._encoder.get_output_dim(),
                               self._decoder.get_input_dim(),
                               "encoder output dim", "decoder input dim")
        check_dimensions_match(self._decoder.get_output_dim(), 1,
                               "decoder output dim",
                               "1, since we're predicting a real value")
        # SmoothL1Loss as described in "Neural Models of Factuality" (NAACL 2018)
        self.loss = torch.nn.SmoothL1Loss(reduction="none")
        self.metrics = {
            "mae": MeanAbsoluteError(),
            "pearson_r": PearsonCorrelation()
        }

        # Whether to run in error analysis mode or not, see commands.error_analysis
        self.error_analysis = False
        logger.info("Applying initializer...")
        initializer(self)
Beispiel #19
0
    def __init__(
            self,
            vocab: Vocabulary,
            embedder: TextFieldEmbedder,
            context_layer: Seq2SeqEncoder,
            modules,  # TODO(dwadden) Add type.
            feature_size: int,
            max_span_width: int,
            max_trigger_span_width: int,
            target_task: str,
            feedforward_params: Dict[str, Union[int, float]],
            loss_weights: Dict[str, float],
            lexical_dropout: float = 0.2,
            use_attentive_span_extractor: bool = False,
            initializer: InitializerApplicator = InitializerApplicator(),
            module_initializer: InitializerApplicator = InitializerApplicator(
            ),
            regularizer: Optional[RegularizerApplicator] = None,
            display_metrics: List[str] = None) -> None:
        super(DyGIE, self).__init__(vocab, regularizer)

        ####################

        # Create span extractor.
        self._endpoint_span_extractor = EndpointSpanExtractor(
            context_layer.get_output_dim(),
            combination="x,y",
            num_width_embeddings=max_span_width,
            span_width_embedding_dim=feature_size,
            bucket_widths=False)
        self._endpoint_trigger_span_extractor = EndpointSpanExtractor(
            context_layer.get_output_dim(),
            combination="x,y",
            num_width_embeddings=max_trigger_span_width,
            span_width_embedding_dim=feature_size,
            bucket_widths=False)

        ####################
        if lexical_dropout > 0:
            self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout)
        else:
            self._lexical_dropout = lambda x: x

        if use_attentive_span_extractor:
            self._attentive_span_extractor = SelfAttentiveSpanExtractor(
                input_dim=context_layer.get_output_dim())
        else:
            self._attentive_span_extractor = None

        # Set parameters.
        self._embedder = embedder
        self._context_layer = context_layer
        self._loss_weights = loss_weights
        self._max_span_width = max_span_width
        self._max_trigger_span_width = max_trigger_span_width
        self._display_metrics = self._get_display_metrics(target_task)

        trigger_emb_dim = self._endpoint_trigger_span_extractor.get_output_dim(
        )
        span_emb_dim = self._endpoint_span_extractor.get_output_dim()

        if self._attentive_span_extractor is not None:
            span_emb_dim += self._attentive_span_extractor.get_output_dim()
            trigger_emb_dim += self._attentive_span_extractor.get_output_dim()

        ####################

        # Create submodules.

        modules = Params(modules)

        # Helper function to create feedforward networks.
        def make_feedforward(input_dim):
            return FeedForward(input_dim=input_dim,
                               num_layers=feedforward_params["num_layers"],
                               hidden_dims=feedforward_params["hidden_dims"],
                               activations=torch.nn.ReLU(),
                               dropout=feedforward_params["dropout"])

        # Submodules

        self._ner = NERTagger.from_params(vocab=vocab,
                                          make_feedforward=make_feedforward,
                                          span_emb_dim=span_emb_dim,
                                          feature_size=feature_size,
                                          params=modules.pop("ner"))

        self._coref = CorefResolver.from_params(
            vocab=vocab,
            make_feedforward=make_feedforward,
            span_emb_dim=span_emb_dim,
            feature_size=feature_size,
            params=modules.pop("coref"))

        self._relation = RelationExtractor.from_params(
            vocab=vocab,
            make_feedforward=make_feedforward,
            span_emb_dim=span_emb_dim,
            feature_size=feature_size,
            params=modules.pop("relation"))

        self._events = EventExtractor.from_params(
            vocab=vocab,
            make_feedforward=make_feedforward,
            text_emb_dim=self._embedder.get_output_dim(),
            trigger_emb_dim=trigger_emb_dim,
            span_emb_dim=span_emb_dim,
            feature_size=feature_size,
            params=modules.pop("events"))

        ####################

        # Initialize text embedder and all submodules
        for module in [self._ner, self._coref, self._relation, self._events]:
            module_initializer(module)

        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 sh_hierarchy_dir: str,
                 text_field_embedder: TextFieldEmbedder,
                 abstract_text_encoder: Seq2SeqEncoder,
                 attention_encoder: AttentionEncoder,
                 local_globel_tradeoff: float = 0.5,
                 bce_pos_weight: int = 10,
                 use_positional_encoding: bool = False,
                 child_parent_index_pair_dir: str = None,
                 hv_penalty_lambda: float = 0.1,
                 hidden_states_dropout: float = 0.1,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(EtdHMCNHierarchicalAttention, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        #         self.num_classes = self.vocab.get_vocab_size("labels")
        self.abstract_text_encoder = abstract_text_encoder
        #         self.attention_encoder = attention_encoder
        self.local_globel_tradeoff = local_globel_tradeoff
        self.use_positional_encoding = use_positional_encoding

        with open(sh_hierarchy_dir, 'r') as f:
            sh_hierarchy = json.load(f)
        # Use same dimension of encoders as HMCN dimension
        self.num_hierarchy_level = len(sh_hierarchy)
        self.attention_encoders = [attention_encoder]
        for i in range(self.num_hierarchy_level - 1):
            self.attention_encoders.append(deepcopy(attention_encoder))
        self.attention_encoders = torch.nn.ModuleList(self.attention_encoders)

        self.HMCN_recurrent = HMCNRecurrent(
            [len(l) for _, l in sh_hierarchy.items()],
            attention_encoder.get_output_dim(),
            attention_encoder.get_output_dim(),
            hidden_states_dropout=hidden_states_dropout)

        if text_field_embedder.get_output_dim(
        ) != abstract_text_encoder.get_input_dim():
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the abstract_text_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       abstract_text_encoder.get_input_dim()))

        self.metrics = {
            #             "roc_auc_score": RocAucScore()
            "hit_5": HitAtK(5),
            "hit_10": HitAtK(10)
            #             "precision_5": PrecisionAtK(5),
            #             "precision_10": PrecisionAtK(10)
            #             "hit_100": HitAtK(100),
            #             "macro_measure": MacroF1Measure(top_k=5,num_label=self.num_classes)
        }

        if child_parent_index_pair_dir:
            child_parent_pairs = []
            with open(child_parent_index_pair_dir, 'r') as f:
                for l in f.readlines():
                    pair = l.strip().split(',')
                    child_parent_pairs.append((int(pair[0]), int(pair[1])))
            childs_idx, parents_idx = map(list, zip(*child_parent_pairs))
            self.loss = HMCNLoss(
                num_classes=[len(l) for _, l in sh_hierarchy.items()],
                bce_pos_weight=bce_pos_weight,
                childs_idx=childs_idx,
                parents_idx=parents_idx,
                penalty_lambda=hv_penalty_lambda)
        else:
            self.loss = HMCNLoss(
                num_classes=[len(l) for _, l in sh_hierarchy.items()],
                bce_pos_weight=bce_pos_weight)

#         self.loss = torch.nn.BCEWithLogitsLoss(pos_weight = torch.ones(self.num_classes)*bce_pos_weight)

        initializer(self)
Beispiel #21
0
    def __init__(
        self,
        vocab: Vocabulary,
        serialization_dir: str,
        pretrained_model: str,
        tokenizer_wrapper: HFTokenizerWrapper,
        num_labels: int,
        label_namespace: str = "labels",
        transformer_weights_path: str = None,
        initializer: InitializerApplicator = InitializerApplicator(),
        **kwargs,
    ) -> None:

        super().__init__(vocab, **kwargs)
        self._tokenizer_wrapper = tokenizer_wrapper
        self._label_namespace = label_namespace

        pre_serialization_dir = os.environ.get("pre_serialization_dir", None)
        if pre_serialization_dir is not None:
            tokenizer_wrapper.tokenizer = tokenizer_wrapper.load(
                pre_serialization_dir)

        if num_labels:
            self._num_labels = num_labels
        else:
            self._num_labels = vocab.get_vocab_size(
                namespace=self._label_namespace)

        self._accuracy = CategoricalAccuracy()

        self._classifier = AutoModelForSequenceClassification.from_pretrained(
            pretrained_model, num_labels=self._num_labels, return_dict=True)
        self._classifier.resize_token_embeddings(
            len(tokenizer_wrapper.tokenizer))

        if transformer_weights_path is not None:
            with TemporaryDirectory() as tmpdirname:
                with tarfile.open(transformer_weights_path,
                                  mode="r:gz") as input_tar:
                    logger.info("Extracting model...")
                    input_tar.extractall(tmpdirname)

                model_state = torch.load(
                    os.path.join(tmpdirname, "weights.th"),
                    map_location=util.device_mapping(-1),
                )

                source_prefix = "_transformers_model."
                target_prefix = "_classifier." + self._classifier.base_model_prefix + "."
                for target_name, parameter in self.named_parameters():
                    if not target_name.startswith(target_prefix):
                        continue
                    source_name = source_prefix + target_name[len(target_prefix
                                                                  ):]
                    source_weights = model_state[source_name]
                    parameter.data.copy_(source_weights.data)

        initializer(self)
        self._tokenizer_wrapper.tokenizer = self._tokenizer_wrapper.load(
            serialization_dir, pending=True)
        self._tokenizer_wrapper.save(serialization_dir)
        self._classifier.resize_token_embeddings(
            len(tokenizer_wrapper.tokenizer))
Beispiel #22
0
    def __init__(
            self,
            vocab: Vocabulary,
            span_encoder: Seq2SeqEncoder,
            reasoning_encoder: Seq2SeqEncoder,
            input_dropout: float = 0.3,
            hidden_dim_maxpool: int = 1024,
            class_embs: bool = True,
            reasoning_use_obj: bool = True,
            reasoning_use_answer: bool = True,
            reasoning_use_question: bool = True,
            pool_reasoning: bool = True,
            pool_answer: bool = True,
            pool_question: bool = False,
            initializer: InitializerApplicator = InitializerApplicator(),
    ):
        super(AttentionQA, self).__init__(vocab)

        self.detector = SimpleDetector(pretrained=True,
                                       average_pool=True,
                                       semantic=class_embs,
                                       final_dim=512)
        ###################################################################################################
        print('0')
        self.rnn_input_dropout = TimeDistributed(
            InputVariationalDropout(
                input_dropout)) if input_dropout > 0 else None

        self.span_encoder = TimeDistributed(span_encoder)
        self.reasoning_encoder = TimeDistributed(reasoning_encoder)

        # add scene classification visual feature and word embedding feature

        self.span_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=span_encoder.get_output_dim(),
        )

        self.obj_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=self.detector.final_dim,
        )

        self.reasoning_use_obj = reasoning_use_obj
        self.reasoning_use_answer = reasoning_use_answer
        self.reasoning_use_question = reasoning_use_question
        self.pool_reasoning = pool_reasoning
        self.pool_answer = pool_answer
        self.pool_question = pool_question
        dim = sum([
            d for d, to_pool in [(
                reasoning_encoder.get_output_dim(), self.pool_reasoning
            ), (span_encoder.get_output_dim(), self.pool_answer
                ), (span_encoder.get_output_dim(), self.pool_question)]
            if to_pool
        ])

        self.final_mlp = torch.nn.Sequential(
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(dim, hidden_dim_maxpool),
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(hidden_dim_maxpool, 1),
        )
        self._accuracy = CategoricalAccuracy()

        # I want to replace the CrossEntropyLoss with LSR

        # self._loss = LabelSmoothingLoss(size=4,smoothing= 0.1)
        self._loss = torch.nn.CrossEntropyLoss()
        initializer(self)
Beispiel #23
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 tag_representation_dim: int,
                 arc_representation_dim: int,
                 tag_feedforward: FeedForward = None,
                 arc_feedforward: FeedForward = None,
                 pos_tag_embedding: Embedding = None,
                 use_mst_decoding_for_validation: bool = True,
                 dropout: float = 0.0,
                 input_dropout: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(BiaffineChineseDependencyParser,
              self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder

        encoder_dim = encoder.get_output_dim()

        self.head_arc_feedforward = arc_feedforward or \
                                    FeedForward(encoder_dim, 1,
                                                arc_representation_dim,
                                                Activation.by_name("elu")())
        self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward)

        self.arc_attention = BilinearMatrixAttention(arc_representation_dim,
                                                     arc_representation_dim,
                                                     use_input_biases=True)

        num_labels = self.vocab.get_vocab_size("head_tags")

        self.head_tag_feedforward = tag_feedforward or \
                                    FeedForward(encoder_dim, 1,
                                                tag_representation_dim,
                                                Activation.by_name("elu")())
        self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward)

        self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim,
                                                      tag_representation_dim,
                                                      num_labels)

        self._pos_tag_embedding = pos_tag_embedding or None
        self._dropout = InputVariationalDropout(dropout)
        self._input_dropout = Dropout(input_dropout)
        self._head_sentinel = torch.nn.Parameter(
            torch.randn([1, 1, encoder.get_output_dim()]))

        representation_dim = text_field_embedder.get_output_dim()
        if pos_tag_embedding is not None:
            representation_dim += pos_tag_embedding.get_output_dim()

        # check_dimensions_match(representation_dim, encoder.get_input_dim(),
        #                        "text field embedding dim", "encoder input dim")

        check_dimensions_match(tag_representation_dim,
                               self.head_tag_feedforward.get_output_dim(),
                               "tag representation dim",
                               "tag feedforward output dim")
        check_dimensions_match(arc_representation_dim,
                               self.head_arc_feedforward.get_output_dim(),
                               "arc representation dim",
                               "arc feedforward output dim")

        self.use_mst_decoding_for_validation = use_mst_decoding_for_validation

        tags = self.vocab.get_token_to_index_vocabulary("pos")
        punctuation_tag_indices = {
            tag: index
            for tag, index in tags.items() if tag in POS_TO_IGNORE
        }
        self._pos_to_ignore = set(punctuation_tag_indices.values())
        logger.info(
            f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. "
            "Ignoring words with these POS tags for evaluation.")

        self._attachment_scores = AttachmentScores()

        self._endpoint_span_extractor = EndpointSpanExtractor(
            self.text_field_embedder.get_output_dim(),
            combination="x,y",
            bucket_widths=False)
        self._attentive_span_extractor = SelfAttentiveSpanExtractor(
            input_dim=self.text_field_embedder.get_output_dim())

        initializer(self)
Beispiel #24
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        label_namespace: str = "labels",
        class_labels: List[str] = None,
        feedforward: Optional[FeedForward] = None,
        label_encoding: Optional[str] = None,
        include_start_end_transitions: bool = True,
        constrain_crf_decoding: bool = None,
        calculate_span_f1: bool = None,
        dropout: Optional[float] = None,
        verbose_metrics: bool = False,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
        cached_embeddings: Optional[bool] = None,
    ) -> None:
        super().__init__(vocab, regularizer)
        self.cached_embeddings = cached_embeddings

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder
        self._verbose_metrics = verbose_metrics
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self._feedforward = feedforward

        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        else:
            output_dim = self.encoder.get_output_dim()
        self.tag_projection_layer = TimeDistributed(
            Linear(output_dim, self.num_tags))

        # if  constrain_crf_decoding and calculate_span_f1 are not
        # provided, (i.e., they're None), set them to True
        # if label_encoding is provided and False if it isn't.
        if constrain_crf_decoding is None:
            constrain_crf_decoding = label_encoding is not None
        if calculate_span_f1 is None:
            calculate_span_f1 = label_encoding is not None

        self.label_encoding = label_encoding
        if constrain_crf_decoding:
            if not label_encoding:
                raise ConfigurationError("constrain_crf_decoding is True, but "
                                         "no label_encoding was specified.")
            labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
            constraints = allowed_transitions(label_encoding, labels)
        else:
            constraints = None

        self.include_start_end_transitions = include_start_end_transitions
        self.crf = WeightedCRF(
            self.num_tags,
            constraints,
            include_start_end_transitions=include_start_end_transitions)

        self.metrics = {
            "accuracy": CategoricalAccuracy(),
        }
        self.calculate_span_f1 = calculate_span_f1
        if calculate_span_f1:
            if not label_encoding:
                raise ConfigurationError("calculate_span_f1 is True, but "
                                         "no label_encoding was specified.")
            self._f1_metric = SpanBasedF1Measure(vocab,
                                                 tag_namespace=label_namespace,
                                                 label_encoding=label_encoding)
            self._tag_f1_metric = TagF1(vocab, class_labels=class_labels)
            self._average_f1_metric = AverageTagF1(vocab,
                                                   class_labels=class_labels)

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        if feedforward is not None:
            check_dimensions_match(encoder.get_output_dim(),
                                   feedforward.get_input_dim(),
                                   "encoder output dim",
                                   "feedforward input dim")
        initializer(self)
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        label_namespace: str = "labels",
        feedforward: Optional[FeedForward] = None,
        label_encoding: Optional[str] = None,
        include_start_end_transitions: bool = True,
        constrain_crf_decoding: bool = None,
        dropout: Optional[float] = None,
        verbose_metrics: bool = False,
        initializer: InitializerApplicator = InitializerApplicator(),
        top_k: int = 1,
        **kwargs,
    ) -> None:
        super().__init__(vocab, **kwargs)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder
        self.top_k = top_k
        self._verbose_metrics = verbose_metrics
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self._feedforward = feedforward

        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        else:
            output_dim = self.encoder.get_output_dim()
        self.tag_projection_layer = TimeDistributed(
            Linear(output_dim, self.num_tags))

        # if  constrain_crf_decoding and calculate_span_f1 are not
        # provided, (i.e., they're None), set them to True
        # if label_encoding is provided and False if it isn't.
        if constrain_crf_decoding is None:
            constrain_crf_decoding = label_encoding is not None

        self.label_encoding = label_encoding
        if constrain_crf_decoding:
            if not label_encoding:
                raise ConfigurationError(
                    "constrain_crf_decoding is True, but no label_encoding was specified."
                )
            labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
            constraints = allowed_transitions(label_encoding, labels)
        else:
            constraints = None

        self.include_start_end_transitions = include_start_end_transitions
        self.crf = ConditionalRandomField(
            self.num_tags,
            constraints,
            include_start_end_transitions=include_start_end_transitions)

        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3),
        }

        check_dimensions_match(
            text_field_embedder.get_output_dim(),
            encoder.get_input_dim(),
            "text field embedding dim",
            "encoder input dim",
        )
        if feedforward is not None:
            check_dimensions_match(
                encoder.get_output_dim(),
                feedforward.get_input_dim(),
                "encoder output dim",
                "feedforward input dim",
            )
        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 bert_pretrained_model: str,
                 dropout_prob: float = 0.1,
                 max_count: int = 10,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 answering_abilities: List[str] = None,
                 number_rep: str = 'first',
                 arithmetic: str = 'base',
                 special_numbers: List[int] = None) -> None:
        super().__init__(vocab, regularizer)

        if answering_abilities is None:
            self.answering_abilities = [
                "passage_span_extraction", "question_span_extraction",
                "arithmetic", "counting"
            ]
        else:
            self.answering_abilities = answering_abilities
        self.number_rep = number_rep

        self.BERT = BertModel.from_pretrained(bert_pretrained_model)
        self.tokenizer = BertTokenizer.from_pretrained(bert_pretrained_model)
        bert_dim = self.BERT.pooler.dense.out_features

        self.dropout = dropout_prob

        self._passage_weights_predictor = torch.nn.Linear(bert_dim, 1)
        self._question_weights_predictor = torch.nn.Linear(bert_dim, 1)
        self._number_weights_predictor = torch.nn.Linear(bert_dim, 1)
        self._arithmetic_weights_predictor = torch.nn.Linear(bert_dim, 1)
        self._sentence_weights_predictor = torch.nn.Linear(bert_dim, 1)

        if len(self.answering_abilities) > 1:
            self._answer_ability_predictor = \
                self.ff(2 * bert_dim, bert_dim, len(self.answering_abilities))

        if "passage_span_extraction" in self.answering_abilities:
            self._passage_span_extraction_index = self.answering_abilities.index(
                "passage_span_extraction")
            self._passage_span_start_predictor = torch.nn.Linear(bert_dim, 1)
            self._passage_span_end_predictor = torch.nn.Linear(bert_dim, 1)

        if "question_span_extraction" in self.answering_abilities:
            self._question_span_extraction_index = self.answering_abilities.index(
                "question_span_extraction")
            self._question_span_start_predictor = \
                self.ff(2 * bert_dim, bert_dim, 1)
            self._question_span_end_predictor = \
                self.ff(2 * bert_dim, bert_dim, 1)

        if "arithmetic" in self.answering_abilities:
            self.arithmetic = arithmetic
            self._arithmetic_index = self.answering_abilities.index(
                "arithmetic")
            self.special_numbers = special_numbers
            self.num_special_numbers = len(self.special_numbers)
            self.special_embedding = torch.nn.Embedding(
                self.num_special_numbers, bert_dim)
            if self.arithmetic == "base":
                self._number_sign_predictor = \
                    self.ff(2 * bert_dim, bert_dim, 3)
            else:
                self.init_arithmetic(bert_dim,
                                     bert_dim,
                                     bert_dim,
                                     layers=2,
                                     dropout=dropout_prob)

        if "counting" in self.answering_abilities:
            self._counting_index = self.answering_abilities.index("counting")

            # Original
            self._count_number_predictor = \
                self.ff(bert_dim, bert_dim, max_count + 1)

            # Regression
            # self._count_number_predictor = \
            #     self.ff(2 * bert_dim, bert_dim, 1)

            # CE: Weighted average
            # self._count_number_predictor = \
            #     self.ff(2 * bert_dim, bert_dim, max_count + 1)

            self.count_classes = torch.arange(max_count + 1).float()

        self._drop_metrics = DropEmAndF1()
        initializer(self)
Beispiel #27
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 attention_similarity_function: SimilarityFunction,
                 residual_encoder: Seq2SeqEncoder,
                 span_start_encoder: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 feed_forward: FeedForward,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(ModelSQUAD, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = MatrixAttention(attention_similarity_function)
        self._residual_encoder = residual_encoder
        self._span_end_encoder = span_end_encoder
        self._span_start_encoder = span_start_encoder
        self._feed_forward = feed_forward

        encoding_dim = phrase_layer.get_output_dim()
        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(encoding_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(encoding_dim, 1))
        self._no_answer_predictor = TimeDistributed(
            torch.nn.Linear(encoding_dim, 1))

        self._self_matrix_attention = MatrixAttention(
            attention_similarity_function)
        self._linear_layer = TimeDistributed(
            torch.nn.Linear(4 * encoding_dim, encoding_dim))
        self._residual_linear_layer = TimeDistributed(
            torch.nn.Linear(3 * encoding_dim, encoding_dim))

        self._w_x = torch.nn.Parameter(torch.Tensor(encoding_dim))
        self._w_y = torch.nn.Parameter(torch.Tensor(encoding_dim))
        self._w_xy = torch.nn.Parameter(torch.Tensor(encoding_dim))
        std = math.sqrt(6 / (encoding_dim * 3 + 1))
        self._w_x.data.uniform_(-std, std)
        self._w_y.data.uniform_(-std, std)
        self._w_xy.data.uniform_(-std, std)

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 context_field_embedder: TextFieldEmbedder,
                 context_encoder: Seq2SeqEncoder,
                 target_encoding_pooling_function: str = 'mean',
                 feedforward: Optional[FeedForward] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 dropout: float = 0.0,
                 label_name: str = 'target-sentiment-labels',
                 loss_weights: Optional[List[float]] = None) -> None:
        super().__init__(vocab, regularizer)
        '''
        :param vocab: A Vocabulary, required in order to compute sizes 
                      for input/output projections.
        :param context_field_embedder: Used to embed the text and target text if
                                       target_field_embedder is None but the 
                                       target_encoder is NOT None.
        :param context_encoder: Encodes the context sentence/text.
        :param target_encoding_pooling_function: Pooling function to be used 
                                                 to create a representation 
                                                 for the target from the encoded 
                                                 context. This pooled 
                                                 representation will then be 
                                                 given to the Optional 
                                                 FeedForward layer. This can be
                                                 either `mean` for mean pooling
                                                 or `max` for max pooling. If 
                                                 this is `max` a `relu` function
                                                 is used before the pooling 
                                                 (this is to overcome the 
                                                 padding issue where some 
                                                 vectors will be zero due to 
                                                 padding.).
        :param feedforward: An optional feed forward layer to apply after the 
                            target encoding average function.
        :param initializer: Used to initialize the model parameters.
        :param regularizer: If provided, will be used to calculate the 
                            regularization penalty during training.
        :param dropout: To apply dropout after each layer apart from the last 
                        layer. All dropout that is applied to timebased data 
                        will be `variational dropout`_ all else will be  
                        standard dropout.
        :param label_name: Name of the label name space.
        :param loss_weights: The amount of weight to give the negative, neutral,
                             positive classes respectively. e.g. [0.2, 0.5, 0.3]
                             would weight the negative class by a factor of 
                             0.2, neutral by 0.5 and positive by 0.3. NOTE It 
                             assumes the sentiment labels are the following:
                             [negative, neutral, positive].
        
        This is based on the TD-BERT model by 
        `Gao et al. 2019 <https://ieeexplore.ieee.org/abstract/document/8864964>`_ 
        figure 2. The `target_encoding_pooling_function` when equal to `max` and the 
        `context_field_embedder` is BERT will be identical to TD-BERT.
        
        '''

        self.label_name = label_name
        self.context_field_embedder = context_field_embedder
        self.context_encoder = context_encoder
        self.num_classes = self.vocab.get_vocab_size(self.label_name)
        self.feedforward = feedforward

        allowed_pooling_functions = ['max', 'mean']
        if target_encoding_pooling_function not in allowed_pooling_functions:
            raise ValueError('Target Encoding Pooling function has to be one '
                             f'of: {allowed_pooling_functions} not: '
                             f'{target_encoding_pooling_function}')
        self.target_encoding_pooling_function = target_encoding_pooling_function 
        self.mean_pooler = BagOfEmbeddingsEncoder(self.context_encoder.get_output_dim(), 
                                                  averaged=True)
        
        # Set the loss weights (have to sort them by order of label index in 
        # the vocab)
        self.loss_weights = target_sentiment.util.loss_weight_order(self, loss_weights, self.label_name)

        if feedforward is not None:
            output_dim = self.feedforward.get_output_dim()
        else:
            output_dim = self.context_encoder.get_output_dim()
        self.label_projection = Linear(output_dim, self.num_classes)
        
        self.metrics = {
                "accuracy": CategoricalAccuracy()
        }
        self.f1_metrics = {}
        # F1 Scores
        label_index_name = self.vocab.get_index_to_token_vocabulary(self.label_name)
        for label_index, _label_name in label_index_name.items():
            _label_name = f'F1_{_label_name.capitalize()}'
            self.f1_metrics[_label_name] = F1Measure(label_index)
        # Dropout
        self._variational_dropout = InputVariationalDropout(dropout)
        check_dimensions_match(context_field_embedder.get_output_dim(),
                               context_encoder.get_input_dim(), 'Embedding',
                               'Encoder')
        if self.feedforward is not None:
            check_dimensions_match(context_encoder.get_output_dim(), 
                                   feedforward.get_input_dim(), 'Encoder', 
                                   'FeedForward')
        initializer(self)
Beispiel #29
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 modeling_layer: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 judge: Model = None,
                 update_judge: bool = False,
                 reward_method: str = None,
                 detach_value_head: bool = False,
                 qa_loss_weight: float = 0.,
                 influence_reward: bool = False,
                 dataset_name: str = 'squad') -> None:
        super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer)

        self.judge = judge
        self.is_judge = self.judge is None
        self.reward_method = None if self.is_judge else reward_method
        self.update_judge = update_judge and (self.judge is not None)
        self._detach_value_head = detach_value_head
        self._qa_loss_weight = qa_loss_weight
        self.influence_reward = influence_reward
        self.answer_type = 'mc' if dataset_name == 'race' else 'span'
        self.output_type = 'span'  # The actual way the output is given (here it's as a pointer to input)
        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        if not self.is_judge:
            self._turn_film_gen = torch.nn.Linear(
                1, 2 * modeling_layer.get_input_dim())
            self._film = FiLM()
        self._modeling_layer = modeling_layer
        self._span_end_encoder = span_end_encoder

        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        span_start_input_dim = encoding_dim * 4 + modeling_dim
        if not self.is_judge:
            self._value_head = TimeDistributed(
                torch.nn.Linear(span_start_input_dim, 1))  # Can make MLP
        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(span_start_input_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(span_end_input_dim, 1))

        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(modeling_layer.get_input_dim(),
                               4 * encoding_dim, "modeling layer input dim",
                               "4 * encoding dim")
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               phrase_layer.get_input_dim(),
                               "text field embedder output dim",
                               "phrase layer input dim")
        check_dimensions_match(span_end_encoder.get_input_dim(),
                               4 * encoding_dim + 3 * modeling_dim,
                               "span end encoder input dim",
                               "4 * encoding dim + 3 * modeling dim")

        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(modeling_layer.get_input_dim(),
                               4 * encoding_dim, "modeling layer input dim",
                               "4 * encoding dim")
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               phrase_layer.get_input_dim(),
                               "text field embedder output dim",
                               "phrase layer input dim")
        check_dimensions_match(span_end_encoder.get_input_dim(),
                               4 * encoding_dim + 3 * modeling_dim,
                               "span end encoder input dim",
                               "4 * encoding dim + 3 * modeling dim")

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
Beispiel #30
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 att_question_to_choice: SimilarityFunction,
                 question_encoder: Optional[Seq2SeqEncoder] = None,
                 choice_encoder: Optional[Seq2SeqEncoder] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 aggregate_question: Optional[str] = "max",
                 aggregate_choice: Optional[str] = "max",
                 embeddings_dropout_value: Optional[float] = 0.0) -> None:
        super(QAMultiChoiceMaxAttention, self).__init__(vocab)

        self._use_cuda = (torch.cuda.is_available()
                          and torch.cuda.current_device() >= 0)

        self._text_field_embedder = text_field_embedder
        if embeddings_dropout_value > 0.0:
            self._embeddings_dropout = torch.nn.Dropout(
                p=embeddings_dropout_value)
        else:
            self._embeddings_dropout = lambda x: x

        self._question_encoder = question_encoder

        # choices encoding
        self._choice_encoder = choice_encoder

        self._question_aggregate = aggregate_question
        self._choice_aggregate = aggregate_choice

        self._num_labels = vocab.get_vocab_size(namespace="labels")

        question_output_dim = self._text_field_embedder.get_output_dim()
        if self._question_encoder is not None:
            question_output_dim = self._question_encoder.get_output_dim()

        choice_output_dim = self._text_field_embedder.get_output_dim()
        if self._choice_encoder is not None:
            choice_output_dim = self._choice_encoder.get_output_dim()

        if question_output_dim != choice_output_dim:
            raise ConfigurationError(
                "Output dimension of the question_encoder (dim: {}) "
                "and choice_encoder (dim: {})"
                "must match! ".format(question_output_dim, choice_output_dim))

        # Check input tensor dimensions for the question to choices attention (similarity function)
        if hasattr(att_question_to_choice, "tensor_1_dim"):
            tensor_1_dim = att_question_to_choice.tensor_1_dim
            if tensor_1_dim != question_output_dim:
                raise ConfigurationError(
                    "Output dimension of the question_encoder (dim: {}) "
                    "and tensor_1_dim (dim: {}) of att_question_to_choice"
                    "must match! ".format(question_output_dim, tensor_1_dim))

        if hasattr(att_question_to_choice, "tensor_2_dim"):
            tensor_2_dim = att_question_to_choice.tensor_2_dim
            if tensor_2_dim != question_output_dim:
                raise ConfigurationError(
                    "Output dimension of the choice_encoder (dim: {}) "
                    "and tensor_2_dim (dim: {}) of att_question_to_choice"
                    "must match! ".format(choice_output_dim, tensor_2_dim))

        self._matrix_attention_question_to_choice = LegacyMatrixAttention(
            att_question_to_choice)

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()

        initializer(self)