Ejemplo n.º 1
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 modeling_layer: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(BidirectionalAttentionFlowFT, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._modeling_layer = modeling_layer
        self._span_end_encoder = span_end_encoder

        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        span_start_input_dim = encoding_dim * 4 + modeling_dim
        self._action_predictor = torch.nn.Linear(modeling_dim, 4)
        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(span_start_input_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(span_end_input_dim, 1))

        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(modeling_layer.get_input_dim(),
                               4 * encoding_dim, "modeling layer input dim",
                               "4 * encoding dim")
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               phrase_layer.get_input_dim(),
                               "text field embedder output dim",
                               "phrase layer input dim")
        check_dimensions_match(span_end_encoder.get_input_dim(),
                               4 * encoding_dim + 3 * modeling_dim,
                               "span end encoder input dim",
                               "4 * encoding dim + 3 * modeling dim")

        self._action_accuracy = CategoricalAccuracy()
        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 soft_align_matrix_attention: SoftAlignmentMatrixAttention,
                 self_matrix_attention: BilinearMatrixAttention,
                 passage_modeling_layer: Seq2SeqEncoder,
                 question_modeling_layer: Seq2SeqEncoder,
                 question_encoding_layer: Seq2VecEncoder,
                 passage_similarity_function: SimilarityFunction,
                 question_similarity_function: SimilarityFunction,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(MultiGranuFusion, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = soft_align_matrix_attention
        self._self_matrix_attention = self_matrix_attention
        self._passage_modeling_layer = passage_modeling_layer
        self._question_modeling_layer = question_modeling_layer
        self._question_encoding_layer = question_encoding_layer
        self._passage_similarity_function = passage_similarity_function
        self._question_similarity_function = question_similarity_function

        passage_modeling_output_dim = self._passage_modeling_layer.get_output_dim(
        )
        question_modeling_output_dim = self._question_modeling_layer.get_output_dim(
        )

        encoding_dim = phrase_layer.get_output_dim()
        self._passage_fusion_weight = nn.Linear(encoding_dim * 4, encoding_dim)
        self._question_fusion_weight = nn.Linear(encoding_dim * 4,
                                                 encoding_dim)
        self._fusion_weight = nn.Linear(encoding_dim * 4, encoding_dim)
        self._span_start_weight = nn.Linear(passage_modeling_output_dim,
                                            question_modeling_output_dim)
        self._span_end_weight = nn.Linear(passage_modeling_output_dim,
                                          question_modeling_output_dim)
        self._span_weight = torch.FloatTensor([0.1, 1])

        self._span_predictor = TimeDistributed(
            torch.nn.Linear(self._passage_modeling_layer.get_output_dim(), 2))

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 attention_similarity_function: SimilarityFunction,
                 modeling_layer: Seq2SeqEncoder,
                 span_start_encoder: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 feed_forward: FeedForward,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(ModelV21, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = MatrixAttention(attention_similarity_function)
        self._modeling_layer = modeling_layer
        self._span_end_encoder = span_end_encoder
        self._span_start_encoder = span_start_encoder
        self._feed_forward = feed_forward

        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        #span_start_input_dim = encoding_dim * 4 + modeling_dim
        #span_start_input_dim = encoding_dim + modeling_dim
        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(encoding_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        #span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim
        #span_end_input_dim = encoding_dim + span_end_encoding_dim
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(encoding_dim, 1))
        self._no_answer_predictor = TimeDistributed(
            torch.nn.Linear(encoding_dim, 1))

        # TODO:
        self._self_matrix_attention = MatrixAttention(
            attention_similarity_function)
        self._linear_layer = TimeDistributed(
            torch.nn.Linear(4 * encoding_dim, encoding_dim))
        self._residual_linear_layer = TimeDistributed(
            torch.nn.Linear(3 * encoding_dim, encoding_dim))

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
Ejemplo n.º 4
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 dimension_l: int,
                 dropout: float = 0.2,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder

        self._dimension_l = dimension_l
        self._encoder_block_q = EncoderBlock(
            input_dim=self._text_field_embedder.get_output_dim(),
            hidden_size=self._dimension_l)
        self._encoder_block_d = EncoderBlock(
            input_dim=self._text_field_embedder.get_output_dim(),
            hidden_size=self._dimension_l)
        self._tri_linear_matrix_attention = TriLinearMatrixAttention(
            5 * self._dimension_l)
        self._softmax_d1 = torch.nn.Softmax(dim=1)
        self._linear_layer = LinearLayer(in_features=20 * self._dimension_l,
                                         out_features=self._dimension_l,
                                         bias=True)

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x

        initializer(self)
Ejemplo n.º 5
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 question_encoder: Seq2SeqEncoder,
                 passage_encoder: Seq2SeqEncoder,
                 pair_encoder: AttentionEncoder,
                 self_encoder: AttentionEncoder,
                 output_layer: QAOutputLayer,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 share_encoder: bool = False):

        super().__init__(vocab, regularizer)
        self.text_field_embedder = text_field_embedder
        self.question_encoder = question_encoder
        self.passage_encoder = passage_encoder
        self.pair_encoder = pair_encoder
        self.self_encoder = self_encoder
        self.output_layer = output_layer

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        self.share_encoder = share_encoder
        self.loss = torch.nn.CrossEntropyLoss()
        initializer(self)
Ejemplo n.º 6
0
    def __init__(self,
                 vocab: Vocabulary,
                 bert_model: BertModel,
                 dropout: float = 0.0,
                 index: str = "bert",
                 trainable: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None, ) -> None:
        super().__init__(vocab, regularizer)

        self._index = index
        self.bert_model = PretrainedBertModel.load(bert_model)
        hidden_size = self.bert_model.config.hidden_size

        for param in self.bert_model.parameters():
            param.requires_grad = trainable

        # 1. Instantiate any additional parts of your network
        self.drop = torch.nn.Dropout(dropout)
        self.linear = torch.nn.Linear(hidden_size, 2)
        
        # 2. DON'T FORGET TO INITIALIZE the additional parts of your network.
        initializer(self.linear)

        # 3. Instantiate your metrics
        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()

        self.loss = torch.nn.CrossEntropyLoss()
Ejemplo n.º 7
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 dropout: float = 0.2,
                 regularizer: Optional[RegularizerApplicator] = None) -> None:

        super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder

        encoding_dim = text_field_embedder.get_output_dim()

        self._dropout = torch.nn.Dropout(p=dropout)

        self._squad_metrics = SquadEmAndF1()

        self.linear_start = nn.Linear(encoding_dim, 1)

        self.linear_end = nn.Linear(encoding_dim, 1)

        self.linear_type = nn.Linear(encoding_dim, 3)

        self._loss_trackers = {
            'loss': Average(),
            'start_loss': Average(),
            'end_loss': Average(),
            'type_loss': Average()
        }
Ejemplo n.º 8
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 phrase_layer: Seq2SeqEncoder,
                 residual_encoder: Seq2SeqEncoder,
                 span_start_encoder: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 initializer: InitializerApplicator,
                 dropout: float = 0.2,
                 pair2vec_dropout: float = 0.15,
                 max_span_length: int = 30,
                 pair2vec_model_file: str = None,
                 pair2vec_config_file: str = None) -> None:
        super().__init__(vocab)
        self._max_span_length = max_span_length
        self._text_field_embedder = text_field_embedder
        self._phrase_layer = phrase_layer
        self._encoding_dim = phrase_layer.get_output_dim()

        self.pair2vec = pair2vec_util.get_pair2vec(pair2vec_config_file,
                                                   pair2vec_model_file)
        self._pair2vec_dropout = torch.nn.Dropout(pair2vec_dropout)

        self._matrix_attention = LinearMatrixAttention(self._encoding_dim,
                                                       self._encoding_dim,
                                                       'x,y,x*y')

        # atten_dim = self._encoding_dim * 4 + 600 if ablation_type == 'attn_over_rels' else self._encoding_dim * 4
        atten_dim = self._encoding_dim * 4 + 600
        self._merge_atten = TimeDistributed(
            torch.nn.Linear(atten_dim, self._encoding_dim))

        self._residual_encoder = residual_encoder

        self._self_attention = LinearMatrixAttention(self._encoding_dim,
                                                     self._encoding_dim,
                                                     'x,y,x*y')

        self._merge_self_attention = TimeDistributed(
            torch.nn.Linear(self._encoding_dim * 3, self._encoding_dim))

        self._span_start_encoder = span_start_encoder
        self._span_end_encoder = span_end_encoder

        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(self._encoding_dim, 1))
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(self._encoding_dim, 1))
        self._squad_metrics = SquadEmAndF1()
        initializer(self)

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._official_em = Average()
        self._official_f1 = Average()

        self._span_accuracy = BooleanAccuracy()
        self._variational_dropout = InputVariationalDropout(dropout)
Ejemplo n.º 9
0
 def __init__(self, submodels: List[object], load_models=False) -> None:
     """
     TODO: Make the output the same as for the BiDAF, not a simplification
     If load_models = True the algorithm will call the load() function of the objects
     which should load them from disk to the RAM. 
     """
     super().__init__(submodels)
     self.cf_a = submodels[0].cf_a
     self._squad_metrics = SquadEmAndF1()
Ejemplo n.º 10
0
    def __init__(self,
                 vocab: Vocabulary,
                 embedder: TextFieldEmbedder,
                 question_encoder: Seq2SeqEncoder,
                 passage_encoder: Seq2SeqEncoder,
                 r: float = 0.8,
                 dropout: float = 0.1,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(EvidenceExtraction, self).__init__(vocab, regularizer)

        self._embedder = embedder

        self._question_encoder = question_encoder
        self._passage_encoder = passage_encoder

        # size: 2H
        encoding_dim = question_encoder.get_output_dim()

        self._gru_cell = nn.GRUCell(2 * encoding_dim, encoding_dim)

        self._gate = nn.Linear(2 * encoding_dim, 2 * encoding_dim)

        self._match_layer_1 = nn.Linear(2 * encoding_dim, encoding_dim)
        self._match_layer_2 = nn.Linear(encoding_dim, 1)

        self._question_attention_for_passage = Attention(
            NonlinearSimilarity(encoding_dim))
        self._question_attention_for_question = Attention(
            NonlinearSimilarity(encoding_dim))
        self._passage_attention_for_answer = Attention(
            NonlinearSimilarity(encoding_dim), normalize=False)
        self._passage_attention_for_ranking = Attention(
            NonlinearSimilarity(encoding_dim))

        self._passage_self_attention = Attention(
            NonlinearSimilarity(encoding_dim))
        self._self_gru_cell = nn.GRUCell(2 * encoding_dim, encoding_dim)
        self._self_gate = nn.Linear(2 * encoding_dim, encoding_dim)

        self._answer_net = nn.GRUCell(encoding_dim, encoding_dim)

        self._v_r_Q = nn.Parameter(torch.rand(encoding_dim))
        self._r = r

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()

        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x

        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 phrase_layer: Seq2SeqEncoder,
                 attention_similarity_function: SimilarityFunction,
                 residual_encoder: Seq2SeqEncoder,
                 span_start_encoder: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(ModelMSMARCO, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._phrase_layer = phrase_layer
        self._residual_encoder = residual_encoder
        self._span_end_encoder = span_end_encoder
        self._span_start_encoder = span_start_encoder

        encoding_dim = phrase_layer.get_output_dim()
        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(encoding_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(encoding_dim, 1))
        self._no_answer_predictor = TimeDistributed(
            torch.nn.Linear(encoding_dim, 1))

        self._matrix_attention = TriLinearAttention(encoding_dim)
        self._self_matrix_attention = TriLinearAttention(encoding_dim)
        self._linear_layer = TimeDistributed(
            torch.nn.Linear(4 * encoding_dim, encoding_dim))
        self._residual_linear_layer = TimeDistributed(
            torch.nn.Linear(3 * encoding_dim, encoding_dim))

        #self._w_x = torch.nn.Parameter(torch.Tensor(encoding_dim))
        #self._w_y = torch.nn.Parameter(torch.Tensor(encoding_dim))
        #self._w_xy = torch.nn.Parameter(torch.Tensor(encoding_dim))

        #std = math.sqrt(6 / (encoding_dim + 1))
        #self._w_x.data.uniform_(-std, std)
        #self._w_y.data.uniform_(-std, std)
        #self._w_xy.data.uniform_(-std, std)

        self._squad_metrics = SquadEmAndF1()
        self._rouge_metric = Rouge()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
        self._ite = 0
Ejemplo n.º 12
0
    def __init__(
            self,
            vocab: Vocabulary,
            elmo_embedder: TextFieldEmbedder,
            tokens_embedder: TextFieldEmbedder,
            features_embedder: TextFieldEmbedder,
            phrase_layer: Seq2SeqEncoder,
            projected_layer: Seq2SeqEncoder,
            contextual_passage: Seq2SeqEncoder,
            contextual_question: Seq2SeqEncoder,
            dropout: float = 0.2,
            regularizer: Optional[RegularizerApplicator] = None,
            initializer: InitializerApplicator = InitializerApplicator(),
    ):

        super(MultiGranularityHierarchicalAttentionFusionNetworks,
              self).__init__(vocab, regularizer)
        self.elmo_embedder = elmo_embedder
        self.tokens_embedder = tokens_embedder
        self.features_embedder = features_embedder
        self._phrase_layer = phrase_layer
        self._encoding_dim = self._phrase_layer.get_output_dim()
        self.projected_layer = torch.nn.Linear(self._encoding_dim + 1024,
                                               self._encoding_dim)
        self.fuse_p = FusionLayer(self._encoding_dim)
        self.fuse_q = FusionLayer(self._encoding_dim)
        self.fuse_s = FusionLayer(self._encoding_dim)
        self.projected_lstm = projected_layer
        self.contextual_layer_p = contextual_passage
        self.contextual_layer_q = contextual_question
        self.linear_self_align = torch.nn.Linear(self._encoding_dim, 1)
        # self._self_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y')
        self._self_attention = BilinearMatrixAttention(self._encoding_dim,
                                                       self._encoding_dim)
        self.bilinear_layer_s = BilinearSeqAtt(self._encoding_dim,
                                               self._encoding_dim)
        self.bilinear_layer_e = BilinearSeqAtt(self._encoding_dim,
                                               self._encoding_dim)
        self.yesno_predictor = FeedForward(self._encoding_dim,
                                           self._encoding_dim, 3)
        self.relu = torch.nn.ReLU()

        self._max_span_length = 30

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        self._span_yesno_accuracy = CategoricalAccuracy()
        self._official_f1 = Average()
        self._variational_dropout = InputVariationalDropout(dropout)

        self._loss = torch.nn.CrossEntropyLoss()
        initializer(self)
Ejemplo n.º 13
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 sim_text_field_embedder: TextFieldEmbedder,
                 loss_weights: Dict,
                 sim_class_weights: List,
                 pretrained_sim_path: str = None,
                 use_scenario_encoding: bool = True,
                 sim_pretraining: bool = False,
                 dropout: float = 0.2,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(BertQA, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        if use_scenario_encoding:
            self._sim_text_field_embedder = sim_text_field_embedder
        self.loss_weights = loss_weights
        self.sim_class_weights = sim_class_weights
        self.use_scenario_encoding = use_scenario_encoding
        self.sim_pretraining = sim_pretraining

        if self.sim_pretraining and not self.use_scenario_encoding:
            raise ValueError(
                "When pretraining Scenario Interpretation Module, you should use it."
            )

        embedding_dim = self._text_field_embedder.get_output_dim()
        self._action_predictor = torch.nn.Linear(embedding_dim, 4)
        self._sim_token_label_predictor = torch.nn.Linear(embedding_dim, 4)
        self._span_predictor = torch.nn.Linear(embedding_dim, 2)
        self._action_accuracy = CategoricalAccuracy()
        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        self._span_loss_metric = Average()
        self._action_loss_metric = Average()
        self._sim_loss_metric = Average()
        self._sim_yes_f1 = F1Measure(2)
        self._sim_no_f1 = F1Measure(3)

        if use_scenario_encoding and pretrained_sim_path is not None:
            logger.info("Loading pretrained model..")
            self.load_state_dict(torch.load(pretrained_sim_path))
            for param in self._sim_text_field_embedder.parameters():
                param.requires_grad = False

        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x

        initializer(self)
Ejemplo n.º 14
0
    def __init__(self,
                 vocab: Vocabulary,
                 embedder: TextFieldEmbedder,
                 question_encoder: Seq2SeqEncoder,
                 passage_encoder: Seq2SeqEncoder,
                 feed_forward: FeedForward,
                 dropout: float = 0.1,
                 num_decoding_steps: int = 40,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(AnswerSynthesis, self).__init__(vocab, regularizer)
        self._vocab = vocab
        self._vocab_size = vocab.get_vocab_size()  # default: tokens
        self._num_decoding_steps = num_decoding_steps
        self._start_token_index = self._vocab.get_token_index(START_SYMBOL)
        self._end_token_index = self._vocab.get_token_index(END_SYMBOL)

        self._embedder = embedder
        self._question_encoder = question_encoder
        self._passage_encoder = passage_encoder

        encoding_dim = question_encoder.get_output_dim()
        embedding_dim = embedder.get_output_dim()

        self._span_start_embedding = nn.Embedding(2, 50)
        self._span_end_embedding = nn.Embedding(2, 50)
        self._gru_decoder = nn.GRUCell(encoding_dim + embedding_dim,
                                       encoding_dim)
        self._feed_forward = feed_forward

        self._attention = Attention(NonlinearSimilarity(encoding_dim))

        self._W_r = nn.Linear(embedding_dim, encoding_dim, bias=False)
        self._U_r = nn.Linear(encoding_dim, encoding_dim, bias=False)
        self._V_r = nn.Linear(encoding_dim, encoding_dim, bias=False)

        self._max_out = Maxout(encoding_dim,
                               num_layers=1,
                               output_dims=int(encoding_dim / 2),
                               pool_sizes=2)
        self._W_o = nn.Linear(int(encoding_dim / 2),
                              self._vocab_size,
                              bias=False)

        self._squad_metrics = SquadEmAndF1()
        #self._predict_acc = CategoricalAccuracy()

        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x

        initializer(self)
        self._num_iter = 0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 phrase_layer: Seq2SeqEncoder,
                 modeling_layer: Seq2SeqEncoder,
                 span_start_encoder: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 dropout: float = 0.2,
                 regularizer: Optional[RegularizerApplicator] = None) -> None:

        super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder

        self._phrase_layer = phrase_layer

        encoding_dim = phrase_layer.get_output_dim()

        self._modeling_layer = modeling_layer

        self._dropout = torch.nn.Dropout(p=dropout)

        self._squad_metrics = SquadEmAndF1()

        self._f1_metrics = F1Measure(1)

        self.linear_1 = nn.Sequential(
            nn.Linear(encoding_dim * 4, encoding_dim), nn.ReLU())
        self.linear_2 = nn.Sequential(
            nn.Linear(encoding_dim * 4, encoding_dim), nn.ReLU())

        self.qc_att = BiAttention(encoding_dim, dropout)

        self._coref_f1_metric = AttF1Measure(0.1)

        self._span_start_encoder = span_start_encoder
        self._span_end_encoder = span_end_encoder

        self.linear_start = nn.Linear(encoding_dim, 1)

        self.linear_end = nn.Linear(encoding_dim, 1)

        self.linear_type = nn.Linear(encoding_dim * 3, 3)

        self._loss_trackers = {
            'loss': Average(),
            'start_loss': Average(),
            'end_loss': Average(),
            'type_loss': Average()
        }
Ejemplo n.º 16
0
    def __init__(self,
                 vocab,
                 text_field_embedder,
                 phrase_layer,
                 residual_encoder,
                 span_start_encoder,
                 span_end_encoder,
                 initializer,
                 dropout=0.2,
                 mask_lstms=True):
        super(BiDAFSelfAttention, self).__init__(vocab)
        # Initialize layers.
        self._text_field_embedder = text_field_embedder

        self._phrase_layer = phrase_layer
        # Inintialize start/end span predictors.
        encoding_dim = phrase_layer.get_output_dim()

        self._matrix_attention = TriLinearAttention(encoding_dim)
        self._merge_atten = TimeDistributed(
            torch.nn.Linear(encoding_dim * 4, encoding_dim))

        self._residual_encoder = residual_encoder
        self._self_atten = TriLinearAttention(encoding_dim)
        self._merge_self_atten = TimeDistributed(
            torch.nn.Linear(encoding_dim * 3, encoding_dim))

        self._span_start_encoder = span_start_encoder
        self._span_end_encoder = span_end_encoder

        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(encoding_dim, 1))
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(encoding_dim, 1))

        initializer(self)

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        self._official_em = Average()
        self._official_f1 = Average()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
            # self._dropout = VariationalDropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms
Ejemplo n.º 17
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        num_highway_layers: int,
        phrase_layer: Seq2SeqEncoder,
        matrix_attention_layer: MatrixAttention,
        modeling_layer: Seq2SeqEncoder,
        dropout_prob: float = 0.1,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ) -> None:
        super().__init__(vocab, regularizer)

        text_embed_dim = text_field_embedder.get_output_dim()
        encoding_in_dim = phrase_layer.get_input_dim()
        encoding_out_dim = phrase_layer.get_output_dim()
        modeling_in_dim = modeling_layer.get_input_dim()
        modeling_out_dim = modeling_layer.get_output_dim()

        self._text_field_embedder = text_field_embedder

        self._embedding_proj_layer = torch.nn.Linear(text_embed_dim,
                                                     encoding_in_dim)
        self._highway_layer = Highway(encoding_in_dim, num_highway_layers)

        self._encoding_proj_layer = torch.nn.Linear(encoding_in_dim,
                                                    encoding_in_dim)
        self._phrase_layer = phrase_layer

        self._matrix_attention = matrix_attention_layer

        self._modeling_proj_layer = torch.nn.Linear(encoding_out_dim * 4,
                                                    modeling_in_dim)
        self._modeling_layer = modeling_layer

        self._span_start_predictor = torch.nn.Linear(modeling_out_dim * 2, 1)
        self._span_end_predictor = torch.nn.Linear(modeling_out_dim * 2, 1)

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._metrics = SquadEmAndF1()
        self._dropout = torch.nn.Dropout(
            p=dropout_prob) if dropout_prob > 0 else lambda x: x

        initializer(self)
Ejemplo n.º 18
0
    def __init__(self,
                 vocab: Vocabulary,
                 pretrained_model: str = None,
                 requires_grad: bool = True,
                 transformer_weights_model: str = None,
                 layer_freeze_regexes: List[str] = None,
                 on_load: bool = False,
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        if on_load:
            logging.info(f"Skipping loading of initial Transformer weights")
            transformer_config = RobertaConfig.from_pretrained(
                pretrained_model)
            self._transformer_model = RobertaModel(transformer_config)

        elif transformer_weights_model:
            logging.info(
                f"Loading Transformer weights model from {transformer_weights_model}"
            )
            transformer_model_loaded = load_archive(transformer_weights_model)
            self._transformer_model = transformer_model_loaded.model._transformer_model
        else:
            self._transformer_model = RobertaModel.from_pretrained(
                pretrained_model)

        for name, param in self._transformer_model.named_parameters():
            grad = requires_grad
            if layer_freeze_regexes and grad:
                grad = not any(
                    [bool(re.search(r, name)) for r in layer_freeze_regexes])
            param.requires_grad = grad

        transformer_config = self._transformer_model.config
        num_labels = 2  # For start/end
        self.qa_outputs = Linear(transformer_config.hidden_size, num_labels)

        # Import GTP2 machinery to get from tokens to actual text
        self.byte_decoder = {v: k for k, v in bytes_to_unicode().items()}

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        self._debug = 2
        self._padding_value = 1  # The index of the RoBERTa padding token
Ejemplo n.º 19
0
def get_accuracy_squad(model, dev_dataset, vocab, trigger_token_ids, answer,
                       span_start, span_end):
    """
    Same as get_accuracy() in utils.py but for SQuAD models.
    """
    model.get_metrics(reset=True)
    model.eval()  # model should be in eval() already, but just in case
    iterator = BucketIterator(
        batch_size=32,
        sorting_keys=[["passage", "num_tokens"], ["question", "num_tokens"]],
    )
    iterator.index_with(vocab)

    # Print out the current triggers.
    print_string = ""
    trigger_words = []
    for idx in trigger_token_ids:
        print_string = print_string + vocab.get_token_from_index(idx) + ", "
        trigger_words.append(vocab.get_token_from_index(idx))
    print("Current Triggers: " + print_string)

    # Evaluate the model using the triggers and get the F1 / EM scores with the target.
    total_f1 = 0.0
    total_em = 0.0
    total = 0.0
    for batch in lazy_groups_of(iterator(dev_dataset,
                                         num_epochs=1,
                                         shuffle=False),
                                group_size=1):
        torch.cuda.empty_cache(
        )  # TODO may be unnecessary but sometimes memory caching cuases OOM
        output_dict = evaluate_batch_squad(model, batch, trigger_token_ids,
                                           vocab, span_start, span_end)
        # go through the model's predictions and compute F1 and EM with the target span.
        for span_str in output_dict["best_span_str"]:
            metrics = SquadEmAndF1()
            metrics.get_metric(reset=True)
            metrics(span_str, [answer])
            em, f1 = metrics.get_metric()
            total_f1 += f1
            total_em += em
            total += 1.0

    print("F1 with target span: " + str(total_f1 / total))
    print("EM with target span: " + str(total_em / total))
Ejemplo n.º 20
0
    def __init__(self,
                 vocab: Vocabulary,
                 hidden_size: int,
                 is_bidirectional: bool,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 gated_attention_layer: Seq2SeqEncoder,
                 self_attention_layer: Seq2SeqEncoder,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(RNet, self).__init__(vocab, regularizer)

        self.hidden_size = hidden_size
        self.is_bidirectional = is_bidirectional
        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._gated_attention_layer = gated_attention_layer
        self._self_attention_layer = self_attention_layer

        encoding_dim = phrase_layer.get_output_dim()
        gated_attention_dim = gated_attention_layer.get_output_dim()
        self_attention_dim = self_attention_layer.get_output_dim()

        self._pointer_network = PointerNet(self_attention_dim,
                                           self.hidden_size, encoding_dim,
                                           self.is_bidirectional)
        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 dropout: float = 0.0,
                 max_span_length: int = 30,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)
        self._text_field_embedder = text_field_embedder
        self._max_span_length = max_span_length

        self.qa_outputs = torch.nn.Linear(
            self._text_field_embedder.get_output_dim(), 2)

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._span_qa_metrics = SquadEmAndF1()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x

        initializer(self)
Ejemplo n.º 22
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 attention_similarity_function: SimilarityFunction,
                 modeling_layer: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = MatrixAttention(attention_similarity_function)
        self._modeling_layer = modeling_layer
        self._span_end_encoder = span_end_encoder

        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()

        self._compat_layer = FC3(encoding_dim * 4 + modeling_dim)
        self._compat_pred_layer = Linear(encoding_dim * 4 + modeling_dim, 2)

        span_start_input_dim = encoding_dim * 4 + modeling_dim
        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(span_start_input_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(span_end_input_dim, 1))

        # Bidaf has lots of layer dimensions which need to match up - these
        # aren't necessarily obvious from the configuration files, so we check
        # here.
        if modeling_layer.get_input_dim() != 4 * encoding_dim:
            raise ConfigurationError(
                "The input dimension to the modeling_layer must be "
                "equal to 4 times the encoding dimension of the phrase_layer. "
                "Found {} and 4 * {} respectively.".format(
                    modeling_layer.get_input_dim(), encoding_dim))
        if text_field_embedder.get_output_dim() != phrase_layer.get_input_dim(
        ):
            raise ConfigurationError(
                "The output dimension of the text_field_embedder (embedding_dim + "
                "char_cnn) must match the input dimension of the phrase_encoder. "
                "Found {} and {}, respectively.".format(
                    text_field_embedder.get_output_dim(),
                    phrase_layer.get_input_dim()))

        if span_end_encoder.get_input_dim(
        ) != encoding_dim * 4 + modeling_dim * 3:
            raise ConfigurationError(
                "The input dimension of the span_end_encoder should be equal to "
                "4 * phrase_layer.output_dim + 3 * modeling_layer.output_dim. "
                "Found {} and (4 * {} + 3 * {}) "
                "respectively.".format(span_end_encoder.get_input_dim(),
                                       encoding_dim, modeling_dim))

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        self._compat_accuracy = BooleanAccuracy()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
Ejemplo n.º 23
0
    def __init__(self, submodels: List[RNet]) -> None:
        super().__init__(submodels)

        self._squad_metrics = SquadEmAndF1()
Ejemplo n.º 24
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 modeling_layer: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 judge: Model = None,
                 update_judge: bool = False,
                 reward_method: str = None,
                 detach_value_head: bool = False,
                 qa_loss_weight: float = 0.,
                 influence_reward: bool = False,
                 dataset_name: str = 'squad') -> None:
        super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer)

        self.judge = judge
        self.is_judge = self.judge is None
        self.reward_method = None if self.is_judge else reward_method
        self.update_judge = update_judge and (self.judge is not None)
        self._detach_value_head = detach_value_head
        self._qa_loss_weight = qa_loss_weight
        self.influence_reward = influence_reward
        self.answer_type = 'mc' if dataset_name == 'race' else 'span'
        self.output_type = 'span'  # The actual way the output is given (here it's as a pointer to input)
        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        if not self.is_judge:
            self._turn_film_gen = torch.nn.Linear(
                1, 2 * modeling_layer.get_input_dim())
            self._film = FiLM()
        self._modeling_layer = modeling_layer
        self._span_end_encoder = span_end_encoder

        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        span_start_input_dim = encoding_dim * 4 + modeling_dim
        if not self.is_judge:
            self._value_head = TimeDistributed(
                torch.nn.Linear(span_start_input_dim, 1))  # Can make MLP
        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(span_start_input_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(span_end_input_dim, 1))

        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(modeling_layer.get_input_dim(),
                               4 * encoding_dim, "modeling layer input dim",
                               "4 * encoding dim")
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               phrase_layer.get_input_dim(),
                               "text field embedder output dim",
                               "phrase layer input dim")
        check_dimensions_match(span_end_encoder.get_input_dim(),
                               4 * encoding_dim + 3 * modeling_dim,
                               "span end encoder input dim",
                               "4 * encoding dim + 3 * modeling dim")

        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(modeling_layer.get_input_dim(),
                               4 * encoding_dim, "modeling layer input dim",
                               "4 * encoding dim")
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               phrase_layer.get_input_dim(),
                               "text field embedder output dim",
                               "phrase layer input dim")
        check_dimensions_match(span_end_encoder.get_input_dim(),
                               4 * encoding_dim + 3 * modeling_dim,
                               "span end encoder input dim",
                               "4 * encoding dim + 3 * modeling dim")

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
Ejemplo n.º 25
0
                                 key=lambda x: num_TP(x, sp_list, r[
                                     'question_lemma'], r['sent_lemmas']),
                                 reverse=True)
            r['sorted_pred_chains'] = pred_chains

    if args.data_path:
        data = []
        for fn in glob.glob(args.data_path):
            with open(fn, 'r') as f:
                data += json.load(f)
        print("Number of data instances:", len(data))
        print("Number of data ids:", len(set([d['_id'] for d in data])))
        data_id2idx = {d['_id']: i for i, d in enumerate(data)}

    if args.data_path and 'best_span_str' in res[0]:
        squad_metrics = SquadEmAndF1()
        for r in res:
            best_span_str = r['best_span_str']
            answer = data[data_id2idx[r['_id']]]['answer'].strip().replace(
                "\n", "")
            squad_metrics(best_span_str, [answer])
        em, f1 = squad_metrics.get_metric(reset=True)
        print("Ans EM:", em, "Ans F1:", f1)

    if 'ans_sent_idxs' in res[0] and not res[0]['ans_sent_idxs'] is None:
        # ans include in prediction
        num = 0
        corr = 0
        for r in res:
            if len(r['ans_sent_idxs']) > 0:
                num += 1
Ejemplo n.º 26
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 phrase_layer: Seq2SeqEncoder,
                 phrase_layer_sp: Seq2SeqEncoder,
                 span_start_encoder: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 self_attention_layer: Seq2SeqEncoder,
                 gate_sent_encoder: Seq2SeqEncoder,
                 gate_self_attention_layer: Seq2SeqEncoder,
                 type_encoder: Seq2SeqEncoder,
                 modeling_layer: Seq2SeqEncoder,
                 modeling_layer_sp: Seq2SeqEncoder,
                 dropout: float = 0.2,
                 output_att_scores: bool = True,
                 sent_labels_src: str = 'sp',
                 gate_self_att: bool = True,
                 regularizer: Optional[RegularizerApplicator] = None) -> None:

        super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder

        self._phrase_layer = phrase_layer
        self._phrase_layer_sp = phrase_layer_sp

        self._dropout = torch.nn.Dropout(p=dropout)

        self._modeling_layer = modeling_layer
        self._modeling_layer_sp = modeling_layer_sp

        self._span_start_encoder = span_start_encoder
        self._span_end_encoder = span_end_encoder
        self._type_encoder = type_encoder

        self._self_attention_layer = self_attention_layer

        self._output_att_scores = output_att_scores
        self._sent_labels_src = sent_labels_src

        encoding_dim = span_start_encoder.get_output_dim()

        self._span_gate = SpanGate(encoding_dim, gate_self_att)
        self.qc_att = BiAttention(encoding_dim, dropout)
        self.qc_att_sp = BiAttention(encoding_dim, dropout)
        if gate_self_att:
            self._gate_sent_encoder = gate_sent_encoder
            self._gate_self_attention_layer = gate_self_attention_layer
        else:
            self._gate_sent_encoder = None
            self._gate_self_attention_layer = None

        self.linear_start = nn.Linear(encoding_dim, 1)

        self.linear_end = nn.Linear(encoding_dim, 1)

        self.linear_type = nn.Linear(encoding_dim * 3, 3)

        self._squad_metrics = SquadEmAndF1()

        self._f1_metrics = F1Measure(1)

        self._loss_trackers = {
            'loss': Average(),
            'start_loss': Average(),
            'end_loss': Average(),
            'type_loss': Average(),
            'strong_sup_loss': Average()
        }
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 matrix_attention_layer: MatrixAttention,
                 modeling_layer: Seq2SeqEncoder,
                 dropout_prob: float = 0.1,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        text_embed_dim = text_field_embedder.get_output_dim()
        encoding_in_dim = phrase_layer.get_input_dim()
        encoding_out_dim = phrase_layer.get_output_dim()
        modeling_in_dim = modeling_layer.get_input_dim()
        modeling_out_dim = modeling_layer.get_output_dim()

        self._text_field_embedder = text_field_embedder

        self._embedding_proj_layer = torch.nn.Linear(text_embed_dim,
                                                     encoding_in_dim)
        self._highway_layer = Highway(encoding_in_dim, num_highway_layers)

        self._encoding_proj_layer = torch.nn.Linear(encoding_in_dim,
                                                    encoding_in_dim)
        self._phrase_layer = phrase_layer

        self._matrix_attention = matrix_attention_layer

        self._modeling_proj_layer = torch.nn.Linear(encoding_out_dim * 4,
                                                    modeling_in_dim)
        self._modeling_layer = modeling_layer

        self._span_start_predictor = torch.nn.Linear(modeling_out_dim * 2, 1)
        self._span_end_predictor = torch.nn.Linear(modeling_out_dim * 2, 1)

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._metrics = SquadEmAndF1()
        self._dropout = torch.nn.Dropout(
            p=dropout_prob) if dropout_prob > 0 else lambda x: x

        # evaluation

        # BLEU
        self._bleu_score_types_to_use = ["BLEU1", "BLEU2", "BLEU3", "BLEU4"]
        self._bleu_scores = {
            x: Average()
            for x in self._bleu_score_types_to_use
        }

        # ROUGE using pyrouge
        self._rouge_score_types_to_use = ['rouge-n', 'rouge-l', 'rouge-w']

        # if we have rouge-n as metric we actualy get n scores like rouge-1, rouge-2, .., rouge-n
        max_rouge_n = 4
        rouge_n_metrics = []
        if "rouge-n" in self._rouge_score_types_to_use:
            rouge_n_metrics = [
                "rouge-{0}".format(x) for x in range(1, max_rouge_n + 1)
            ]

        rouge_scores_names = rouge_n_metrics + [
            y for y in self._rouge_score_types_to_use if y != 'rouge-n'
        ]
        self._rouge_scores = {x: Average() for x in rouge_scores_names}
        self._rouge_evaluator = rouge.Rouge(
            metrics=self._rouge_score_types_to_use,
            max_n=max_rouge_n,
            limit_length=True,
            length_limit=100,
            length_limit_type='words',
            apply_avg=False,
            apply_best=False,
            alpha=0.5,  # Default F1_score
            weight_factor=1.2,
            stemming=True)

        initializer(self)
Ejemplo n.º 28
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder, use_attention: bool,
                 seq2seq_encoder: Seq2SeqEncoder,
                 seq2vec_encoder: Seq2VecEncoder,
                 span_end_encoder_after: Seq2SeqEncoder,
                 use_decoder_trainer: bool, decoder_beam_search: BeamSearch,
                 kb_configs: dict, other_configs: dict,
                 initializer: InitializerApplicator) -> None:
        super(ProStructModel, self).__init__(vocab)

        self.text_field_embedder = text_field_embedder
        self.num_actions = len(Action)  # number of actions is hardcoded here.
        # They are defined in Action enum in propara_dataset_reader.py
        self.other_configs = other_configs

        # kb_coefficient * kb_score + (1-kb_coefficient) * model_score
        self.kb_coefficient = torch.nn.Parameter(
            torch.ones(1).mul(kb_configs.get('kb_coefficient', 0.5)))

        self.use_attention = use_attention
        self.use_decoder_trainer = use_decoder_trainer
        if self.use_attention:
            self.seq2seq_encoder = seq2seq_encoder
            self.time_distributed_seq2seq_encoder = TimeDistributed(
                TimeDistributed(self.seq2seq_encoder))
            self.time_distributed_attention_layer = \
                TimeDistributed(TimeDistributed(
                    Attention(similarity_function=BilinearSimilarity(2 * seq2seq_encoder.get_output_dim(),
                                                                     seq2seq_encoder.get_output_dim()),
                              normalize=True)))
            self.aggregate_feedforward = Linear(
                seq2seq_encoder.get_output_dim(), self.num_actions)
        else:
            self.seq2vec_encoder = seq2vec_encoder
            self.time_distributed_seq2vec_encoder = TimeDistributed(
                TimeDistributed(self.seq2vec_encoder))
            self.aggregate_feedforward = Linear(
                seq2vec_encoder.get_output_dim(), self.num_actions)

        self.span_end_encoder_after = span_end_encoder_after
        # per step per participant
        self.time_distributed_encoder_span_end_after = TimeDistributed(
            TimeDistributed(self.span_end_encoder_after))

        # Fixme: dimensions

        self._span_start_predictor_after = TimeDistributed(
            TimeDistributed(
                torch.nn.Linear(2 + 2 * seq2seq_encoder.get_output_dim(), 1)))

        self._span_end_predictor_after = TimeDistributed(
            TimeDistributed(
                torch.nn.Linear(span_end_encoder_after.get_output_dim(), 1)))

        self._type_accuracy = BooleanAccuracy(
        )  # Fixme WRONG. Categorical accuracy should be right!
        self._loss = torch.nn.CrossEntropyLoss(
            ignore_index=-1
        )  # Fixme: This is less robust. If the masking value

        # Fixme: add a metric for location span strings
        self.span_metric = SquadEmAndF1()

        if self.use_decoder_trainer:
            self.decoder_trainer = MaximumMarginalLikelihood()
            if kb_configs['kb_to_use'] == 'lexicalkb':
                kb = KBLexical(lexical_kb_path=kb_configs['lexical_kb_path'],
                               fullgrid_prompts_load_path=kb_configs[
                                   'fullgrid_prompts_load_path'])

            # Makeshift arrangement to get number of participants in tiny.tsv .
            self.commonsense_based_action_generator = CommonsenseBasedActionGenerator(
                self.num_actions)
            self.rules_activated = [
                int(rule_val.strip()) > 0
                for rule_val in self.other_configs.get(
                    'constraint_rules_to_turn_on', '0,0,0,1').split(",")
            ]
            self.rule_2_fraction_participants = self.other_configs.get(
                'rule_2_fraction_participants', 0.5)
            self.rule_3_fraction_steps = self.other_configs.get(
                'rule_3_fraction_steps', 0.5)

            self.commonsense_based_action_generator.set_rules_used(
                self.rules_activated, self.rule_2_fraction_participants,
                self.rule_3_fraction_steps)
            # [self.rules_activated[0],  # C/D/C/D cannot happen
            #  self.rules_activated[1],  # > 1/2 partic
            #  self.rules_activated[2],  # > 1/2 steps cannot change
            #  self.rules_activated[3]  # until mentioned
            #  ])
            self.decoder_step = ProParaDecoderStep(
                KBBasedActionScorer(kb=kb, kb_coefficient=self.kb_coefficient),
                valid_action_generator=self.commonsense_based_action_generator)

        self.beam_search = decoder_beam_search
        initializer(self)
Ejemplo n.º 29
0
    def __init__(self, submodels: List[BidirectionalAttentionFlow]) -> None:
        super().__init__(submodels)

        self._squad_metrics = SquadEmAndF1()
Ejemplo n.º 30
0
    def __init__(self, vocab: Vocabulary, cf_a, preloaded_elmo=None) -> None:
        super(BidirectionalAttentionFlow_1,
              self).__init__(vocab, cf_a.regularizer)
        """
        Initialize some data structures 
        """
        self.cf_a = cf_a
        # Bayesian data models
        self.VBmodels = []
        self.LinearModels = []
        """
        ############## TEXT FIELD EMBEDDER with ELMO ####################
        text_field_embedder : ``TextFieldEmbedder``
            Used to embed the ``question`` and ``passage`` ``TextFields`` we get as input to the model.
        """
        if (cf_a.use_ELMO):
            if (type(preloaded_elmo) != type(None)):
                text_field_embedder = preloaded_elmo
            else:
                text_field_embedder = bidut.download_Elmo(
                    cf_a.ELMO_num_layers, cf_a.ELMO_droput)
                print("ELMO loaded from disk or downloaded")
        else:
            text_field_embedder = None

#        embedder_out_dim  = text_field_embedder.get_output_dim()
        self._text_field_embedder = text_field_embedder

        if (cf_a.Add_Linear_projection_ELMO):
            if (self.cf_a.VB_Linear_projection_ELMO):
                prior = Vil.Prior(**(cf_a.VB_Linear_projection_ELMO_prior))
                print(
                    "----------------- Bayesian Linear Projection ELMO --------------"
                )
                linear_projection_ELMO = LinearVB(
                    text_field_embedder.get_output_dim(), 200, prior=prior)
                self.VBmodels.append(linear_projection_ELMO)
            else:
                linear_projection_ELMO = torch.nn.Linear(
                    text_field_embedder.get_output_dim(), 200)

            self._linear_projection_ELMO = linear_projection_ELMO
        """
        ############## Highway layers ####################
        num_highway_layers : ``int``
            The number of highway layers to use in between embedding the input and passing it through
            the phrase layer.
        """

        Input_dimension_highway = None
        if (cf_a.Add_Linear_projection_ELMO):
            Input_dimension_highway = 200
        else:
            Input_dimension_highway = text_field_embedder.get_output_dim()

        num_highway_layers = cf_a.num_highway_layers
        # Linear later to compute the start
        if (self.cf_a.VB_highway_layers):
            print("----------------- Bayesian Highway network  --------------")
            prior = Vil.Prior(**(cf_a.VB_highway_layers_prior))
            highway_layer = HighwayVB(Input_dimension_highway,
                                      num_highway_layers,
                                      prior=prior)
            self.VBmodels.append(highway_layer)
        else:

            highway_layer = Highway(Input_dimension_highway,
                                    num_highway_layers)
        highway_layer = TimeDistributed(highway_layer)

        self._highway_layer = highway_layer
        """
        ############## Phrase layer ####################
        phrase_layer : ``Seq2SeqEncoder``
            The encoder (with its own internal stacking) that we will use in between embedding tokens
            and doing the bidirectional attention.
        """
        if cf_a.phrase_layer_dropout > 0:  ## Create dropout layer
            dropout_phrase_layer = torch.nn.Dropout(
                p=cf_a.phrase_layer_dropout)
        else:
            dropout_phrase_layer = lambda x: x

        phrase_layer = PytorchSeq2SeqWrapper(
            torch.nn.LSTM(Input_dimension_highway,
                          hidden_size=cf_a.phrase_layer_hidden_size,
                          batch_first=True,
                          bidirectional=True,
                          num_layers=cf_a.phrase_layer_num_layers,
                          dropout=cf_a.phrase_layer_dropout))

        phrase_encoding_out_dim = cf_a.phrase_layer_hidden_size * 2
        self._phrase_layer = phrase_layer
        self._dropout_phrase_layer = dropout_phrase_layer
        """
        ############## Matrix attention layer ####################
        similarity_function : ``SimilarityFunction``
            The similarity function that we will use when comparing encoded passage and question
            representations.
        """

        # Linear later to compute the start
        if (self.cf_a.VB_similarity_function):
            prior = Vil.Prior(**(cf_a.VB_similarity_function_prior))
            print(
                "----------------- Bayesian Similarity matrix --------------")
            similarity_function = LinearSimilarityVB(
                combination="x,y,x*y",
                tensor_1_dim=phrase_encoding_out_dim,
                tensor_2_dim=phrase_encoding_out_dim,
                prior=prior)
            self.VBmodels.append(similarity_function)
        else:
            similarity_function = LinearSimilarity(
                combination="x,y,x*y",
                tensor_1_dim=phrase_encoding_out_dim,
                tensor_2_dim=phrase_encoding_out_dim)

        matrix_attention = LegacyMatrixAttention(similarity_function)
        self._matrix_attention = matrix_attention
        """
        ############## Modelling Layer ####################
        modeling_layer : ``Seq2SeqEncoder``
            The encoder (with its own internal stacking) that we will use in between the bidirectional
            attention and predicting span start and end.
        """
        ## Create dropout layer
        if cf_a.modeling_passage_dropout > 0:  ## Create dropout layer
            dropout_modeling_passage = torch.nn.Dropout(
                p=cf_a.modeling_passage_dropout)
        else:
            dropout_modeling_passage = lambda x: x

        modeling_layer = PytorchSeq2SeqWrapper(
            torch.nn.LSTM(phrase_encoding_out_dim * 4,
                          hidden_size=cf_a.modeling_passage_hidden_size,
                          batch_first=True,
                          bidirectional=True,
                          num_layers=cf_a.modeling_passage_num_layers,
                          dropout=cf_a.modeling_passage_dropout))

        self._modeling_layer = modeling_layer
        self._dropout_modeling_passage = dropout_modeling_passage
        """
        ############## Span Start Representation #####################
        span_end_encoder : ``Seq2SeqEncoder``
            The encoder that we will use to incorporate span start predictions into the passage state
            before predicting span end.
        """
        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        span_start_input_dim = encoding_dim * 4 + modeling_dim

        # Linear later to compute the start
        if (self.cf_a.VB_span_start_predictor_linear):
            prior = Vil.Prior(**(cf_a.VB_span_start_predictor_linear_prior))
            print(
                "----------------- Bayesian Span Start Predictor--------------"
            )
            span_start_predictor_linear = LinearVB(span_start_input_dim,
                                                   1,
                                                   prior=prior)
            self.VBmodels.append(span_start_predictor_linear)
        else:
            span_start_predictor_linear = torch.nn.Linear(
                span_start_input_dim, 1)

        self._span_start_predictor_linear = span_start_predictor_linear
        self._span_start_predictor = TimeDistributed(
            span_start_predictor_linear)
        """
        ############## Span End Representation #####################
        """

        ## Create dropout layer
        if cf_a.span_end_encoder_dropout > 0:
            dropout_span_end_encode = torch.nn.Dropout(
                p=cf_a.span_end_encoder_dropout)
        else:
            dropout_span_end_encode = lambda x: x

        span_end_encoder = PytorchSeq2SeqWrapper(
            torch.nn.LSTM(encoding_dim * 4 + modeling_dim * 3,
                          hidden_size=cf_a.modeling_span_end_hidden_size,
                          batch_first=True,
                          bidirectional=True,
                          num_layers=cf_a.modeling_span_end_num_layers,
                          dropout=cf_a.span_end_encoder_dropout))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim

        self._span_end_encoder = span_end_encoder
        self._dropout_span_end_encode = dropout_span_end_encode

        if (self.cf_a.VB_span_end_predictor_linear):
            print(
                "----------------- Bayesian Span End Predictor--------------")
            prior = Vil.Prior(**(cf_a.VB_span_end_predictor_linear_prior))
            span_end_predictor_linear = LinearVB(span_end_input_dim,
                                                 1,
                                                 prior=prior)
            self.VBmodels.append(span_end_predictor_linear)
        else:
            span_end_predictor_linear = torch.nn.Linear(span_end_input_dim, 1)

        self._span_end_predictor_linear = span_end_predictor_linear
        self._span_end_predictor = TimeDistributed(span_end_predictor_linear)
        """
        Dropput last layers
        """
        if cf_a.spans_output_dropout > 0:
            dropout_spans_output = torch.nn.Dropout(
                p=cf_a.span_end_encoder_dropout)
        else:
            dropout_spans_output = lambda x: x

        self._dropout_spans_output = dropout_spans_output
        """
        Checkings and accuracy
        """
        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(modeling_layer.get_input_dim(),
                               4 * encoding_dim, "modeling layer input dim",
                               "4 * encoding dim")
        check_dimensions_match(Input_dimension_highway,
                               phrase_layer.get_input_dim(),
                               "text field embedder output dim",
                               "phrase layer input dim")
        check_dimensions_match(span_end_encoder.get_input_dim(),
                               4 * encoding_dim + 3 * modeling_dim,
                               "span end encoder input dim",
                               "4 * encoding dim + 3 * modeling dim")

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        """
        mask_lstms : ``bool``, optional (default=True)
            If ``False``, we will skip passing the mask to the LSTM layers.  This gives a ~2x speedup,
            with only a slight performance decrease, if any.  We haven't experimented much with this
            yet, but have confirmed that we still get very similar performance with much faster
            training times.  We still use the mask for all softmaxes, but avoid the shuffling that's
            required when using masking with pytorch LSTMs.
        """
        self._mask_lstms = cf_a.mask_lstms
        """
        ################### Initialize parameters ##############################
        """
        #### THEY ARE ALL INITIALIZED WHEN INSTANTING THE COMPONENTS ###
        """
        ####################### OPTIMIZER ################
        """
        optimizer = pytut.get_optimizers(self, cf_a)
        self._optimizer = optimizer