def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlowFT, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim self._action_predictor = torch.nn.Linear(modeling_dim, 4) self._span_start_predictor = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") self._action_accuracy = CategoricalAccuracy() self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, soft_align_matrix_attention: SoftAlignmentMatrixAttention, self_matrix_attention: BilinearMatrixAttention, passage_modeling_layer: Seq2SeqEncoder, question_modeling_layer: Seq2SeqEncoder, question_encoding_layer: Seq2VecEncoder, passage_similarity_function: SimilarityFunction, question_similarity_function: SimilarityFunction, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(MultiGranuFusion, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = soft_align_matrix_attention self._self_matrix_attention = self_matrix_attention self._passage_modeling_layer = passage_modeling_layer self._question_modeling_layer = question_modeling_layer self._question_encoding_layer = question_encoding_layer self._passage_similarity_function = passage_similarity_function self._question_similarity_function = question_similarity_function passage_modeling_output_dim = self._passage_modeling_layer.get_output_dim( ) question_modeling_output_dim = self._question_modeling_layer.get_output_dim( ) encoding_dim = phrase_layer.get_output_dim() self._passage_fusion_weight = nn.Linear(encoding_dim * 4, encoding_dim) self._question_fusion_weight = nn.Linear(encoding_dim * 4, encoding_dim) self._fusion_weight = nn.Linear(encoding_dim * 4, encoding_dim) self._span_start_weight = nn.Linear(passage_modeling_output_dim, question_modeling_output_dim) self._span_end_weight = nn.Linear(passage_modeling_output_dim, question_modeling_output_dim) self._span_weight = torch.FloatTensor([0.1, 1]) self._span_predictor = TimeDistributed( torch.nn.Linear(self._passage_modeling_layer.get_output_dim(), 2)) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, attention_similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, feed_forward: FeedForward, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(ModelV21, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = MatrixAttention(attention_similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder self._span_start_encoder = span_start_encoder self._feed_forward = feed_forward encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() #span_start_input_dim = encoding_dim * 4 + modeling_dim #span_start_input_dim = encoding_dim + modeling_dim self._span_start_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() #span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim #span_end_input_dim = encoding_dim + span_end_encoding_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) self._no_answer_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) # TODO: self._self_matrix_attention = MatrixAttention( attention_similarity_function) self._linear_layer = TimeDistributed( torch.nn.Linear(4 * encoding_dim, encoding_dim)) self._residual_linear_layer = TimeDistributed( torch.nn.Linear(3 * encoding_dim, encoding_dim)) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, dimension_l: int, dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._dimension_l = dimension_l self._encoder_block_q = EncoderBlock( input_dim=self._text_field_embedder.get_output_dim(), hidden_size=self._dimension_l) self._encoder_block_d = EncoderBlock( input_dim=self._text_field_embedder.get_output_dim(), hidden_size=self._dimension_l) self._tri_linear_matrix_attention = TriLinearMatrixAttention( 5 * self._dimension_l) self._softmax_d1 = torch.nn.Softmax(dim=1) self._linear_layer = LinearLayer(in_features=20 * self._dimension_l, out_features=self._dimension_l, bias=True) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, question_encoder: Seq2SeqEncoder, passage_encoder: Seq2SeqEncoder, pair_encoder: AttentionEncoder, self_encoder: AttentionEncoder, output_layer: QAOutputLayer, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, share_encoder: bool = False): super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.question_encoder = question_encoder self.passage_encoder = passage_encoder self.pair_encoder = pair_encoder self.self_encoder = self_encoder self.output_layer = output_layer self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() self.share_encoder = share_encoder self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, bert_model: BertModel, dropout: float = 0.0, index: str = "bert", trainable: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self._index = index self.bert_model = PretrainedBertModel.load(bert_model) hidden_size = self.bert_model.config.hidden_size for param in self.bert_model.parameters(): param.requires_grad = trainable # 1. Instantiate any additional parts of your network self.drop = torch.nn.Dropout(dropout) self.linear = torch.nn.Linear(hidden_size, 2) # 2. DON'T FORGET TO INITIALIZE the additional parts of your network. initializer(self.linear) # 3. Instantiate your metrics self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() self.loss = torch.nn.CrossEntropyLoss()
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, dropout: float = 0.2, regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder encoding_dim = text_field_embedder.get_output_dim() self._dropout = torch.nn.Dropout(p=dropout) self._squad_metrics = SquadEmAndF1() self.linear_start = nn.Linear(encoding_dim, 1) self.linear_end = nn.Linear(encoding_dim, 1) self.linear_type = nn.Linear(encoding_dim, 3) self._loss_trackers = { 'loss': Average(), 'start_loss': Average(), 'end_loss': Average(), 'type_loss': Average() }
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, residual_encoder: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, initializer: InitializerApplicator, dropout: float = 0.2, pair2vec_dropout: float = 0.15, max_span_length: int = 30, pair2vec_model_file: str = None, pair2vec_config_file: str = None) -> None: super().__init__(vocab) self._max_span_length = max_span_length self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer self._encoding_dim = phrase_layer.get_output_dim() self.pair2vec = pair2vec_util.get_pair2vec(pair2vec_config_file, pair2vec_model_file) self._pair2vec_dropout = torch.nn.Dropout(pair2vec_dropout) self._matrix_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') # atten_dim = self._encoding_dim * 4 + 600 if ablation_type == 'attn_over_rels' else self._encoding_dim * 4 atten_dim = self._encoding_dim * 4 + 600 self._merge_atten = TimeDistributed( torch.nn.Linear(atten_dim, self._encoding_dim)) self._residual_encoder = residual_encoder self._self_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._merge_self_attention = TimeDistributed( torch.nn.Linear(self._encoding_dim * 3, self._encoding_dim)) self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self._span_start_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 1)) self._span_end_predictor = TimeDistributed( torch.nn.Linear(self._encoding_dim, 1)) self._squad_metrics = SquadEmAndF1() initializer(self) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._official_em = Average() self._official_f1 = Average() self._span_accuracy = BooleanAccuracy() self._variational_dropout = InputVariationalDropout(dropout)
def __init__(self, submodels: List[object], load_models=False) -> None: """ TODO: Make the output the same as for the BiDAF, not a simplification If load_models = True the algorithm will call the load() function of the objects which should load them from disk to the RAM. """ super().__init__(submodels) self.cf_a = submodels[0].cf_a self._squad_metrics = SquadEmAndF1()
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, question_encoder: Seq2SeqEncoder, passage_encoder: Seq2SeqEncoder, r: float = 0.8, dropout: float = 0.1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(EvidenceExtraction, self).__init__(vocab, regularizer) self._embedder = embedder self._question_encoder = question_encoder self._passage_encoder = passage_encoder # size: 2H encoding_dim = question_encoder.get_output_dim() self._gru_cell = nn.GRUCell(2 * encoding_dim, encoding_dim) self._gate = nn.Linear(2 * encoding_dim, 2 * encoding_dim) self._match_layer_1 = nn.Linear(2 * encoding_dim, encoding_dim) self._match_layer_2 = nn.Linear(encoding_dim, 1) self._question_attention_for_passage = Attention( NonlinearSimilarity(encoding_dim)) self._question_attention_for_question = Attention( NonlinearSimilarity(encoding_dim)) self._passage_attention_for_answer = Attention( NonlinearSimilarity(encoding_dim), normalize=False) self._passage_attention_for_ranking = Attention( NonlinearSimilarity(encoding_dim)) self._passage_self_attention = Attention( NonlinearSimilarity(encoding_dim)) self._self_gru_cell = nn.GRUCell(2 * encoding_dim, encoding_dim) self._self_gate = nn.Linear(2 * encoding_dim, encoding_dim) self._answer_net = nn.GRUCell(encoding_dim, encoding_dim) self._v_r_Q = nn.Parameter(torch.rand(encoding_dim)) self._r = r self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, attention_similarity_function: SimilarityFunction, residual_encoder: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(ModelMSMARCO, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer self._residual_encoder = residual_encoder self._span_end_encoder = span_end_encoder self._span_start_encoder = span_start_encoder encoding_dim = phrase_layer.get_output_dim() self._span_start_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() self._span_end_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) self._no_answer_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) self._matrix_attention = TriLinearAttention(encoding_dim) self._self_matrix_attention = TriLinearAttention(encoding_dim) self._linear_layer = TimeDistributed( torch.nn.Linear(4 * encoding_dim, encoding_dim)) self._residual_linear_layer = TimeDistributed( torch.nn.Linear(3 * encoding_dim, encoding_dim)) #self._w_x = torch.nn.Parameter(torch.Tensor(encoding_dim)) #self._w_y = torch.nn.Parameter(torch.Tensor(encoding_dim)) #self._w_xy = torch.nn.Parameter(torch.Tensor(encoding_dim)) #std = math.sqrt(6 / (encoding_dim + 1)) #self._w_x.data.uniform_(-std, std) #self._w_y.data.uniform_(-std, std) #self._w_xy.data.uniform_(-std, std) self._squad_metrics = SquadEmAndF1() self._rouge_metric = Rouge() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self) self._ite = 0
def __init__( self, vocab: Vocabulary, elmo_embedder: TextFieldEmbedder, tokens_embedder: TextFieldEmbedder, features_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, projected_layer: Seq2SeqEncoder, contextual_passage: Seq2SeqEncoder, contextual_question: Seq2SeqEncoder, dropout: float = 0.2, regularizer: Optional[RegularizerApplicator] = None, initializer: InitializerApplicator = InitializerApplicator(), ): super(MultiGranularityHierarchicalAttentionFusionNetworks, self).__init__(vocab, regularizer) self.elmo_embedder = elmo_embedder self.tokens_embedder = tokens_embedder self.features_embedder = features_embedder self._phrase_layer = phrase_layer self._encoding_dim = self._phrase_layer.get_output_dim() self.projected_layer = torch.nn.Linear(self._encoding_dim + 1024, self._encoding_dim) self.fuse_p = FusionLayer(self._encoding_dim) self.fuse_q = FusionLayer(self._encoding_dim) self.fuse_s = FusionLayer(self._encoding_dim) self.projected_lstm = projected_layer self.contextual_layer_p = contextual_passage self.contextual_layer_q = contextual_question self.linear_self_align = torch.nn.Linear(self._encoding_dim, 1) # self._self_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self._self_attention = BilinearMatrixAttention(self._encoding_dim, self._encoding_dim) self.bilinear_layer_s = BilinearSeqAtt(self._encoding_dim, self._encoding_dim) self.bilinear_layer_e = BilinearSeqAtt(self._encoding_dim, self._encoding_dim) self.yesno_predictor = FeedForward(self._encoding_dim, self._encoding_dim, 3) self.relu = torch.nn.ReLU() self._max_span_length = 30 self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() self._span_yesno_accuracy = CategoricalAccuracy() self._official_f1 = Average() self._variational_dropout = InputVariationalDropout(dropout) self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, sim_text_field_embedder: TextFieldEmbedder, loss_weights: Dict, sim_class_weights: List, pretrained_sim_path: str = None, use_scenario_encoding: bool = True, sim_pretraining: bool = False, dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BertQA, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder if use_scenario_encoding: self._sim_text_field_embedder = sim_text_field_embedder self.loss_weights = loss_weights self.sim_class_weights = sim_class_weights self.use_scenario_encoding = use_scenario_encoding self.sim_pretraining = sim_pretraining if self.sim_pretraining and not self.use_scenario_encoding: raise ValueError( "When pretraining Scenario Interpretation Module, you should use it." ) embedding_dim = self._text_field_embedder.get_output_dim() self._action_predictor = torch.nn.Linear(embedding_dim, 4) self._sim_token_label_predictor = torch.nn.Linear(embedding_dim, 4) self._span_predictor = torch.nn.Linear(embedding_dim, 2) self._action_accuracy = CategoricalAccuracy() self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() self._span_loss_metric = Average() self._action_loss_metric = Average() self._sim_loss_metric = Average() self._sim_yes_f1 = F1Measure(2) self._sim_no_f1 = F1Measure(3) if use_scenario_encoding and pretrained_sim_path is not None: logger.info("Loading pretrained model..") self.load_state_dict(torch.load(pretrained_sim_path)) for param in self._sim_text_field_embedder.parameters(): param.requires_grad = False if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x initializer(self)
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, question_encoder: Seq2SeqEncoder, passage_encoder: Seq2SeqEncoder, feed_forward: FeedForward, dropout: float = 0.1, num_decoding_steps: int = 40, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(AnswerSynthesis, self).__init__(vocab, regularizer) self._vocab = vocab self._vocab_size = vocab.get_vocab_size() # default: tokens self._num_decoding_steps = num_decoding_steps self._start_token_index = self._vocab.get_token_index(START_SYMBOL) self._end_token_index = self._vocab.get_token_index(END_SYMBOL) self._embedder = embedder self._question_encoder = question_encoder self._passage_encoder = passage_encoder encoding_dim = question_encoder.get_output_dim() embedding_dim = embedder.get_output_dim() self._span_start_embedding = nn.Embedding(2, 50) self._span_end_embedding = nn.Embedding(2, 50) self._gru_decoder = nn.GRUCell(encoding_dim + embedding_dim, encoding_dim) self._feed_forward = feed_forward self._attention = Attention(NonlinearSimilarity(encoding_dim)) self._W_r = nn.Linear(embedding_dim, encoding_dim, bias=False) self._U_r = nn.Linear(encoding_dim, encoding_dim, bias=False) self._V_r = nn.Linear(encoding_dim, encoding_dim, bias=False) self._max_out = Maxout(encoding_dim, num_layers=1, output_dims=int(encoding_dim / 2), pool_sizes=2) self._W_o = nn.Linear(int(encoding_dim / 2), self._vocab_size, bias=False) self._squad_metrics = SquadEmAndF1() #self._predict_acc = CategoricalAccuracy() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x initializer(self) self._num_iter = 0
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, modeling_layer: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer encoding_dim = phrase_layer.get_output_dim() self._modeling_layer = modeling_layer self._dropout = torch.nn.Dropout(p=dropout) self._squad_metrics = SquadEmAndF1() self._f1_metrics = F1Measure(1) self.linear_1 = nn.Sequential( nn.Linear(encoding_dim * 4, encoding_dim), nn.ReLU()) self.linear_2 = nn.Sequential( nn.Linear(encoding_dim * 4, encoding_dim), nn.ReLU()) self.qc_att = BiAttention(encoding_dim, dropout) self._coref_f1_metric = AttF1Measure(0.1) self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self.linear_start = nn.Linear(encoding_dim, 1) self.linear_end = nn.Linear(encoding_dim, 1) self.linear_type = nn.Linear(encoding_dim * 3, 3) self._loss_trackers = { 'loss': Average(), 'start_loss': Average(), 'end_loss': Average(), 'type_loss': Average() }
def __init__(self, vocab, text_field_embedder, phrase_layer, residual_encoder, span_start_encoder, span_end_encoder, initializer, dropout=0.2, mask_lstms=True): super(BiDAFSelfAttention, self).__init__(vocab) # Initialize layers. self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer # Inintialize start/end span predictors. encoding_dim = phrase_layer.get_output_dim() self._matrix_attention = TriLinearAttention(encoding_dim) self._merge_atten = TimeDistributed( torch.nn.Linear(encoding_dim * 4, encoding_dim)) self._residual_encoder = residual_encoder self._self_atten = TriLinearAttention(encoding_dim) self._merge_self_atten = TimeDistributed( torch.nn.Linear(encoding_dim * 3, encoding_dim)) self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self._span_start_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) self._span_end_predictor = TimeDistributed( torch.nn.Linear(encoding_dim, 1)) initializer(self) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() self._official_em = Average() self._official_f1 = Average() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) # self._dropout = VariationalDropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, matrix_attention_layer: MatrixAttention, modeling_layer: Seq2SeqEncoder, dropout_prob: float = 0.1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) text_embed_dim = text_field_embedder.get_output_dim() encoding_in_dim = phrase_layer.get_input_dim() encoding_out_dim = phrase_layer.get_output_dim() modeling_in_dim = modeling_layer.get_input_dim() modeling_out_dim = modeling_layer.get_output_dim() self._text_field_embedder = text_field_embedder self._embedding_proj_layer = torch.nn.Linear(text_embed_dim, encoding_in_dim) self._highway_layer = Highway(encoding_in_dim, num_highway_layers) self._encoding_proj_layer = torch.nn.Linear(encoding_in_dim, encoding_in_dim) self._phrase_layer = phrase_layer self._matrix_attention = matrix_attention_layer self._modeling_proj_layer = torch.nn.Linear(encoding_out_dim * 4, modeling_in_dim) self._modeling_layer = modeling_layer self._span_start_predictor = torch.nn.Linear(modeling_out_dim * 2, 1) self._span_end_predictor = torch.nn.Linear(modeling_out_dim * 2, 1) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._metrics = SquadEmAndF1() self._dropout = torch.nn.Dropout( p=dropout_prob) if dropout_prob > 0 else lambda x: x initializer(self)
def __init__(self, vocab: Vocabulary, pretrained_model: str = None, requires_grad: bool = True, transformer_weights_model: str = None, layer_freeze_regexes: List[str] = None, on_load: bool = False, regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) if on_load: logging.info(f"Skipping loading of initial Transformer weights") transformer_config = RobertaConfig.from_pretrained( pretrained_model) self._transformer_model = RobertaModel(transformer_config) elif transformer_weights_model: logging.info( f"Loading Transformer weights model from {transformer_weights_model}" ) transformer_model_loaded = load_archive(transformer_weights_model) self._transformer_model = transformer_model_loaded.model._transformer_model else: self._transformer_model = RobertaModel.from_pretrained( pretrained_model) for name, param in self._transformer_model.named_parameters(): grad = requires_grad if layer_freeze_regexes and grad: grad = not any( [bool(re.search(r, name)) for r in layer_freeze_regexes]) param.requires_grad = grad transformer_config = self._transformer_model.config num_labels = 2 # For start/end self.qa_outputs = Linear(transformer_config.hidden_size, num_labels) # Import GTP2 machinery to get from tokens to actual text self.byte_decoder = {v: k for k, v in bytes_to_unicode().items()} self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() self._debug = 2 self._padding_value = 1 # The index of the RoBERTa padding token
def get_accuracy_squad(model, dev_dataset, vocab, trigger_token_ids, answer, span_start, span_end): """ Same as get_accuracy() in utils.py but for SQuAD models. """ model.get_metrics(reset=True) model.eval() # model should be in eval() already, but just in case iterator = BucketIterator( batch_size=32, sorting_keys=[["passage", "num_tokens"], ["question", "num_tokens"]], ) iterator.index_with(vocab) # Print out the current triggers. print_string = "" trigger_words = [] for idx in trigger_token_ids: print_string = print_string + vocab.get_token_from_index(idx) + ", " trigger_words.append(vocab.get_token_from_index(idx)) print("Current Triggers: " + print_string) # Evaluate the model using the triggers and get the F1 / EM scores with the target. total_f1 = 0.0 total_em = 0.0 total = 0.0 for batch in lazy_groups_of(iterator(dev_dataset, num_epochs=1, shuffle=False), group_size=1): torch.cuda.empty_cache( ) # TODO may be unnecessary but sometimes memory caching cuases OOM output_dict = evaluate_batch_squad(model, batch, trigger_token_ids, vocab, span_start, span_end) # go through the model's predictions and compute F1 and EM with the target span. for span_str in output_dict["best_span_str"]: metrics = SquadEmAndF1() metrics.get_metric(reset=True) metrics(span_str, [answer]) em, f1 = metrics.get_metric() total_f1 += f1 total_em += em total += 1.0 print("F1 with target span: " + str(total_f1 / total)) print("EM with target span: " + str(total_em / total))
def __init__(self, vocab: Vocabulary, hidden_size: int, is_bidirectional: bool, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, gated_attention_layer: Seq2SeqEncoder, self_attention_layer: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(RNet, self).__init__(vocab, regularizer) self.hidden_size = hidden_size self.is_bidirectional = is_bidirectional self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._gated_attention_layer = gated_attention_layer self._self_attention_layer = self_attention_layer encoding_dim = phrase_layer.get_output_dim() gated_attention_dim = gated_attention_layer.get_output_dim() self_attention_dim = self_attention_layer.get_output_dim() self._pointer_network = PointerNet(self_attention_dim, self.hidden_size, encoding_dim, self.is_bidirectional) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, dropout: float = 0.0, max_span_length: int = 30, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._max_span_length = max_span_length self.qa_outputs = torch.nn.Linear( self._text_field_embedder.get_output_dim(), 2) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._span_qa_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, attention_similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = MatrixAttention(attention_similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() self._compat_layer = FC3(encoding_dim * 4 + modeling_dim) self._compat_pred_layer = Linear(encoding_dim * 4 + modeling_dim, 2) span_start_input_dim = encoding_dim * 4 + modeling_dim self._span_start_predictor = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these # aren't necessarily obvious from the configuration files, so we check # here. if modeling_layer.get_input_dim() != 4 * encoding_dim: raise ConfigurationError( "The input dimension to the modeling_layer must be " "equal to 4 times the encoding dimension of the phrase_layer. " "Found {} and 4 * {} respectively.".format( modeling_layer.get_input_dim(), encoding_dim)) if text_field_embedder.get_output_dim() != phrase_layer.get_input_dim( ): raise ConfigurationError( "The output dimension of the text_field_embedder (embedding_dim + " "char_cnn) must match the input dimension of the phrase_encoder. " "Found {} and {}, respectively.".format( text_field_embedder.get_output_dim(), phrase_layer.get_input_dim())) if span_end_encoder.get_input_dim( ) != encoding_dim * 4 + modeling_dim * 3: raise ConfigurationError( "The input dimension of the span_end_encoder should be equal to " "4 * phrase_layer.output_dim + 3 * modeling_layer.output_dim. " "Found {} and (4 * {} + 3 * {}) " "respectively.".format(span_end_encoder.get_input_dim(), encoding_dim, modeling_dim)) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() self._compat_accuracy = BooleanAccuracy() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
def __init__(self, submodels: List[RNet]) -> None: super().__init__(submodels) self._squad_metrics = SquadEmAndF1()
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, judge: Model = None, update_judge: bool = False, reward_method: str = None, detach_value_head: bool = False, qa_loss_weight: float = 0., influence_reward: bool = False, dataset_name: str = 'squad') -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self.judge = judge self.is_judge = self.judge is None self.reward_method = None if self.is_judge else reward_method self.update_judge = update_judge and (self.judge is not None) self._detach_value_head = detach_value_head self._qa_loss_weight = qa_loss_weight self.influence_reward = influence_reward self.answer_type = 'mc' if dataset_name == 'race' else 'span' self.output_type = 'span' # The actual way the output is given (here it's as a pointer to input) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) if not self.is_judge: self._turn_film_gen = torch.nn.Linear( 1, 2 * modeling_layer.get_input_dim()) self._film = FiLM() self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim if not self.is_judge: self._value_head = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) # Can make MLP self._span_start_predictor = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)
key=lambda x: num_TP(x, sp_list, r[ 'question_lemma'], r['sent_lemmas']), reverse=True) r['sorted_pred_chains'] = pred_chains if args.data_path: data = [] for fn in glob.glob(args.data_path): with open(fn, 'r') as f: data += json.load(f) print("Number of data instances:", len(data)) print("Number of data ids:", len(set([d['_id'] for d in data]))) data_id2idx = {d['_id']: i for i, d in enumerate(data)} if args.data_path and 'best_span_str' in res[0]: squad_metrics = SquadEmAndF1() for r in res: best_span_str = r['best_span_str'] answer = data[data_id2idx[r['_id']]]['answer'].strip().replace( "\n", "") squad_metrics(best_span_str, [answer]) em, f1 = squad_metrics.get_metric(reset=True) print("Ans EM:", em, "Ans F1:", f1) if 'ans_sent_idxs' in res[0] and not res[0]['ans_sent_idxs'] is None: # ans include in prediction num = 0 corr = 0 for r in res: if len(r['ans_sent_idxs']) > 0: num += 1
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, phrase_layer_sp: Seq2SeqEncoder, span_start_encoder: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, self_attention_layer: Seq2SeqEncoder, gate_sent_encoder: Seq2SeqEncoder, gate_self_attention_layer: Seq2SeqEncoder, type_encoder: Seq2SeqEncoder, modeling_layer: Seq2SeqEncoder, modeling_layer_sp: Seq2SeqEncoder, dropout: float = 0.2, output_att_scores: bool = True, sent_labels_src: str = 'sp', gate_self_att: bool = True, regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._phrase_layer = phrase_layer self._phrase_layer_sp = phrase_layer_sp self._dropout = torch.nn.Dropout(p=dropout) self._modeling_layer = modeling_layer self._modeling_layer_sp = modeling_layer_sp self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self._type_encoder = type_encoder self._self_attention_layer = self_attention_layer self._output_att_scores = output_att_scores self._sent_labels_src = sent_labels_src encoding_dim = span_start_encoder.get_output_dim() self._span_gate = SpanGate(encoding_dim, gate_self_att) self.qc_att = BiAttention(encoding_dim, dropout) self.qc_att_sp = BiAttention(encoding_dim, dropout) if gate_self_att: self._gate_sent_encoder = gate_sent_encoder self._gate_self_attention_layer = gate_self_attention_layer else: self._gate_sent_encoder = None self._gate_self_attention_layer = None self.linear_start = nn.Linear(encoding_dim, 1) self.linear_end = nn.Linear(encoding_dim, 1) self.linear_type = nn.Linear(encoding_dim * 3, 3) self._squad_metrics = SquadEmAndF1() self._f1_metrics = F1Measure(1) self._loss_trackers = { 'loss': Average(), 'start_loss': Average(), 'end_loss': Average(), 'type_loss': Average(), 'strong_sup_loss': Average() }
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, matrix_attention_layer: MatrixAttention, modeling_layer: Seq2SeqEncoder, dropout_prob: float = 0.1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) text_embed_dim = text_field_embedder.get_output_dim() encoding_in_dim = phrase_layer.get_input_dim() encoding_out_dim = phrase_layer.get_output_dim() modeling_in_dim = modeling_layer.get_input_dim() modeling_out_dim = modeling_layer.get_output_dim() self._text_field_embedder = text_field_embedder self._embedding_proj_layer = torch.nn.Linear(text_embed_dim, encoding_in_dim) self._highway_layer = Highway(encoding_in_dim, num_highway_layers) self._encoding_proj_layer = torch.nn.Linear(encoding_in_dim, encoding_in_dim) self._phrase_layer = phrase_layer self._matrix_attention = matrix_attention_layer self._modeling_proj_layer = torch.nn.Linear(encoding_out_dim * 4, modeling_in_dim) self._modeling_layer = modeling_layer self._span_start_predictor = torch.nn.Linear(modeling_out_dim * 2, 1) self._span_end_predictor = torch.nn.Linear(modeling_out_dim * 2, 1) self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._metrics = SquadEmAndF1() self._dropout = torch.nn.Dropout( p=dropout_prob) if dropout_prob > 0 else lambda x: x # evaluation # BLEU self._bleu_score_types_to_use = ["BLEU1", "BLEU2", "BLEU3", "BLEU4"] self._bleu_scores = { x: Average() for x in self._bleu_score_types_to_use } # ROUGE using pyrouge self._rouge_score_types_to_use = ['rouge-n', 'rouge-l', 'rouge-w'] # if we have rouge-n as metric we actualy get n scores like rouge-1, rouge-2, .., rouge-n max_rouge_n = 4 rouge_n_metrics = [] if "rouge-n" in self._rouge_score_types_to_use: rouge_n_metrics = [ "rouge-{0}".format(x) for x in range(1, max_rouge_n + 1) ] rouge_scores_names = rouge_n_metrics + [ y for y in self._rouge_score_types_to_use if y != 'rouge-n' ] self._rouge_scores = {x: Average() for x in rouge_scores_names} self._rouge_evaluator = rouge.Rouge( metrics=self._rouge_score_types_to_use, max_n=max_rouge_n, limit_length=True, length_limit=100, length_limit_type='words', apply_avg=False, apply_best=False, alpha=0.5, # Default F1_score weight_factor=1.2, stemming=True) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, use_attention: bool, seq2seq_encoder: Seq2SeqEncoder, seq2vec_encoder: Seq2VecEncoder, span_end_encoder_after: Seq2SeqEncoder, use_decoder_trainer: bool, decoder_beam_search: BeamSearch, kb_configs: dict, other_configs: dict, initializer: InitializerApplicator) -> None: super(ProStructModel, self).__init__(vocab) self.text_field_embedder = text_field_embedder self.num_actions = len(Action) # number of actions is hardcoded here. # They are defined in Action enum in propara_dataset_reader.py self.other_configs = other_configs # kb_coefficient * kb_score + (1-kb_coefficient) * model_score self.kb_coefficient = torch.nn.Parameter( torch.ones(1).mul(kb_configs.get('kb_coefficient', 0.5))) self.use_attention = use_attention self.use_decoder_trainer = use_decoder_trainer if self.use_attention: self.seq2seq_encoder = seq2seq_encoder self.time_distributed_seq2seq_encoder = TimeDistributed( TimeDistributed(self.seq2seq_encoder)) self.time_distributed_attention_layer = \ TimeDistributed(TimeDistributed( Attention(similarity_function=BilinearSimilarity(2 * seq2seq_encoder.get_output_dim(), seq2seq_encoder.get_output_dim()), normalize=True))) self.aggregate_feedforward = Linear( seq2seq_encoder.get_output_dim(), self.num_actions) else: self.seq2vec_encoder = seq2vec_encoder self.time_distributed_seq2vec_encoder = TimeDistributed( TimeDistributed(self.seq2vec_encoder)) self.aggregate_feedforward = Linear( seq2vec_encoder.get_output_dim(), self.num_actions) self.span_end_encoder_after = span_end_encoder_after # per step per participant self.time_distributed_encoder_span_end_after = TimeDistributed( TimeDistributed(self.span_end_encoder_after)) # Fixme: dimensions self._span_start_predictor_after = TimeDistributed( TimeDistributed( torch.nn.Linear(2 + 2 * seq2seq_encoder.get_output_dim(), 1))) self._span_end_predictor_after = TimeDistributed( TimeDistributed( torch.nn.Linear(span_end_encoder_after.get_output_dim(), 1))) self._type_accuracy = BooleanAccuracy( ) # Fixme WRONG. Categorical accuracy should be right! self._loss = torch.nn.CrossEntropyLoss( ignore_index=-1 ) # Fixme: This is less robust. If the masking value # Fixme: add a metric for location span strings self.span_metric = SquadEmAndF1() if self.use_decoder_trainer: self.decoder_trainer = MaximumMarginalLikelihood() if kb_configs['kb_to_use'] == 'lexicalkb': kb = KBLexical(lexical_kb_path=kb_configs['lexical_kb_path'], fullgrid_prompts_load_path=kb_configs[ 'fullgrid_prompts_load_path']) # Makeshift arrangement to get number of participants in tiny.tsv . self.commonsense_based_action_generator = CommonsenseBasedActionGenerator( self.num_actions) self.rules_activated = [ int(rule_val.strip()) > 0 for rule_val in self.other_configs.get( 'constraint_rules_to_turn_on', '0,0,0,1').split(",") ] self.rule_2_fraction_participants = self.other_configs.get( 'rule_2_fraction_participants', 0.5) self.rule_3_fraction_steps = self.other_configs.get( 'rule_3_fraction_steps', 0.5) self.commonsense_based_action_generator.set_rules_used( self.rules_activated, self.rule_2_fraction_participants, self.rule_3_fraction_steps) # [self.rules_activated[0], # C/D/C/D cannot happen # self.rules_activated[1], # > 1/2 partic # self.rules_activated[2], # > 1/2 steps cannot change # self.rules_activated[3] # until mentioned # ]) self.decoder_step = ProParaDecoderStep( KBBasedActionScorer(kb=kb, kb_coefficient=self.kb_coefficient), valid_action_generator=self.commonsense_based_action_generator) self.beam_search = decoder_beam_search initializer(self)
def __init__(self, submodels: List[BidirectionalAttentionFlow]) -> None: super().__init__(submodels) self._squad_metrics = SquadEmAndF1()
def __init__(self, vocab: Vocabulary, cf_a, preloaded_elmo=None) -> None: super(BidirectionalAttentionFlow_1, self).__init__(vocab, cf_a.regularizer) """ Initialize some data structures """ self.cf_a = cf_a # Bayesian data models self.VBmodels = [] self.LinearModels = [] """ ############## TEXT FIELD EMBEDDER with ELMO #################### text_field_embedder : ``TextFieldEmbedder`` Used to embed the ``question`` and ``passage`` ``TextFields`` we get as input to the model. """ if (cf_a.use_ELMO): if (type(preloaded_elmo) != type(None)): text_field_embedder = preloaded_elmo else: text_field_embedder = bidut.download_Elmo( cf_a.ELMO_num_layers, cf_a.ELMO_droput) print("ELMO loaded from disk or downloaded") else: text_field_embedder = None # embedder_out_dim = text_field_embedder.get_output_dim() self._text_field_embedder = text_field_embedder if (cf_a.Add_Linear_projection_ELMO): if (self.cf_a.VB_Linear_projection_ELMO): prior = Vil.Prior(**(cf_a.VB_Linear_projection_ELMO_prior)) print( "----------------- Bayesian Linear Projection ELMO --------------" ) linear_projection_ELMO = LinearVB( text_field_embedder.get_output_dim(), 200, prior=prior) self.VBmodels.append(linear_projection_ELMO) else: linear_projection_ELMO = torch.nn.Linear( text_field_embedder.get_output_dim(), 200) self._linear_projection_ELMO = linear_projection_ELMO """ ############## Highway layers #################### num_highway_layers : ``int`` The number of highway layers to use in between embedding the input and passing it through the phrase layer. """ Input_dimension_highway = None if (cf_a.Add_Linear_projection_ELMO): Input_dimension_highway = 200 else: Input_dimension_highway = text_field_embedder.get_output_dim() num_highway_layers = cf_a.num_highway_layers # Linear later to compute the start if (self.cf_a.VB_highway_layers): print("----------------- Bayesian Highway network --------------") prior = Vil.Prior(**(cf_a.VB_highway_layers_prior)) highway_layer = HighwayVB(Input_dimension_highway, num_highway_layers, prior=prior) self.VBmodels.append(highway_layer) else: highway_layer = Highway(Input_dimension_highway, num_highway_layers) highway_layer = TimeDistributed(highway_layer) self._highway_layer = highway_layer """ ############## Phrase layer #################### phrase_layer : ``Seq2SeqEncoder`` The encoder (with its own internal stacking) that we will use in between embedding tokens and doing the bidirectional attention. """ if cf_a.phrase_layer_dropout > 0: ## Create dropout layer dropout_phrase_layer = torch.nn.Dropout( p=cf_a.phrase_layer_dropout) else: dropout_phrase_layer = lambda x: x phrase_layer = PytorchSeq2SeqWrapper( torch.nn.LSTM(Input_dimension_highway, hidden_size=cf_a.phrase_layer_hidden_size, batch_first=True, bidirectional=True, num_layers=cf_a.phrase_layer_num_layers, dropout=cf_a.phrase_layer_dropout)) phrase_encoding_out_dim = cf_a.phrase_layer_hidden_size * 2 self._phrase_layer = phrase_layer self._dropout_phrase_layer = dropout_phrase_layer """ ############## Matrix attention layer #################### similarity_function : ``SimilarityFunction`` The similarity function that we will use when comparing encoded passage and question representations. """ # Linear later to compute the start if (self.cf_a.VB_similarity_function): prior = Vil.Prior(**(cf_a.VB_similarity_function_prior)) print( "----------------- Bayesian Similarity matrix --------------") similarity_function = LinearSimilarityVB( combination="x,y,x*y", tensor_1_dim=phrase_encoding_out_dim, tensor_2_dim=phrase_encoding_out_dim, prior=prior) self.VBmodels.append(similarity_function) else: similarity_function = LinearSimilarity( combination="x,y,x*y", tensor_1_dim=phrase_encoding_out_dim, tensor_2_dim=phrase_encoding_out_dim) matrix_attention = LegacyMatrixAttention(similarity_function) self._matrix_attention = matrix_attention """ ############## Modelling Layer #################### modeling_layer : ``Seq2SeqEncoder`` The encoder (with its own internal stacking) that we will use in between the bidirectional attention and predicting span start and end. """ ## Create dropout layer if cf_a.modeling_passage_dropout > 0: ## Create dropout layer dropout_modeling_passage = torch.nn.Dropout( p=cf_a.modeling_passage_dropout) else: dropout_modeling_passage = lambda x: x modeling_layer = PytorchSeq2SeqWrapper( torch.nn.LSTM(phrase_encoding_out_dim * 4, hidden_size=cf_a.modeling_passage_hidden_size, batch_first=True, bidirectional=True, num_layers=cf_a.modeling_passage_num_layers, dropout=cf_a.modeling_passage_dropout)) self._modeling_layer = modeling_layer self._dropout_modeling_passage = dropout_modeling_passage """ ############## Span Start Representation ##################### span_end_encoder : ``Seq2SeqEncoder`` The encoder that we will use to incorporate span start predictions into the passage state before predicting span end. """ encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() span_start_input_dim = encoding_dim * 4 + modeling_dim # Linear later to compute the start if (self.cf_a.VB_span_start_predictor_linear): prior = Vil.Prior(**(cf_a.VB_span_start_predictor_linear_prior)) print( "----------------- Bayesian Span Start Predictor--------------" ) span_start_predictor_linear = LinearVB(span_start_input_dim, 1, prior=prior) self.VBmodels.append(span_start_predictor_linear) else: span_start_predictor_linear = torch.nn.Linear( span_start_input_dim, 1) self._span_start_predictor_linear = span_start_predictor_linear self._span_start_predictor = TimeDistributed( span_start_predictor_linear) """ ############## Span End Representation ##################### """ ## Create dropout layer if cf_a.span_end_encoder_dropout > 0: dropout_span_end_encode = torch.nn.Dropout( p=cf_a.span_end_encoder_dropout) else: dropout_span_end_encode = lambda x: x span_end_encoder = PytorchSeq2SeqWrapper( torch.nn.LSTM(encoding_dim * 4 + modeling_dim * 3, hidden_size=cf_a.modeling_span_end_hidden_size, batch_first=True, bidirectional=True, num_layers=cf_a.modeling_span_end_num_layers, dropout=cf_a.span_end_encoder_dropout)) span_end_encoding_dim = span_end_encoder.get_output_dim() span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim self._span_end_encoder = span_end_encoder self._dropout_span_end_encode = dropout_span_end_encode if (self.cf_a.VB_span_end_predictor_linear): print( "----------------- Bayesian Span End Predictor--------------") prior = Vil.Prior(**(cf_a.VB_span_end_predictor_linear_prior)) span_end_predictor_linear = LinearVB(span_end_input_dim, 1, prior=prior) self.VBmodels.append(span_end_predictor_linear) else: span_end_predictor_linear = torch.nn.Linear(span_end_input_dim, 1) self._span_end_predictor_linear = span_end_predictor_linear self._span_end_predictor = TimeDistributed(span_end_predictor_linear) """ Dropput last layers """ if cf_a.spans_output_dropout > 0: dropout_spans_output = torch.nn.Dropout( p=cf_a.span_end_encoder_dropout) else: dropout_spans_output = lambda x: x self._dropout_spans_output = dropout_spans_output """ Checkings and accuracy """ # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(Input_dimension_highway, phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() """ mask_lstms : ``bool``, optional (default=True) If ``False``, we will skip passing the mask to the LSTM layers. This gives a ~2x speedup, with only a slight performance decrease, if any. We haven't experimented much with this yet, but have confirmed that we still get very similar performance with much faster training times. We still use the mask for all softmaxes, but avoid the shuffling that's required when using masking with pytorch LSTMs. """ self._mask_lstms = cf_a.mask_lstms """ ################### Initialize parameters ############################## """ #### THEY ARE ALL INITIALIZED WHEN INSTANTING THE COMPONENTS ### """ ####################### OPTIMIZER ################ """ optimizer = pytut.get_optimizers(self, cf_a) self._optimizer = optimizer