def test_get_dimension_is_correct(self): encoder = StackedSelfAttentionEncoder(input_dim=9, hidden_dim=12, projection_dim=6, feedforward_hidden_dim=5, num_layers=3, num_attention_heads=3) assert encoder.get_input_dim() == 9 # hidden_dim + projection_dim assert encoder.get_output_dim() == 12
def test_get_dimension_is_correct(self): encoder = StackedSelfAttentionEncoder(input_dim=9, hidden_dim=12, projection_dim=7, feedforward_hidden_dim=5, num_layers=3, num_attention_heads=3) assert encoder.get_input_dim() == 9 # hidden_dim + projection_dim assert encoder.get_output_dim() == 12
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, num_highway_layers: int, phrase_layer: Seq2SeqEncoder, similarity_function: SimilarityFunction, self_attention_layer: StackedSelfAttentionEncoder, modeling_layer: Seq2SeqEncoder, span_end_encoder: Seq2SeqEncoder, dropout: float = 0.2, mask_lstms: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._highway_layer = TimeDistributed( Highway(text_field_embedder.get_output_dim(), num_highway_layers)) self._phrase_layer = phrase_layer self._matrix_attention = LegacyMatrixAttention(similarity_function) self._modeling_layer = modeling_layer self._span_end_encoder = span_end_encoder #New Self Attention layer self._self_attention_layer = self_attention_layer self._sa_matrix_attention = LegacyMatrixAttention(similarity_function) selfattent_dim = self_attention_layer.get_output_dim() # its 200 #print("Self Attention Output Dim:",selfattent_dim,"\n") encoding_dim = phrase_layer.get_output_dim() modeling_dim = modeling_layer.get_output_dim() #span_start_input_dim = encoding_dim * 4 + modeling_dim span_start_input_dim = encoding_dim * 4 + modeling_dim + 2 * selfattent_dim self._span_start_predictor = TimeDistributed( torch.nn.Linear(span_start_input_dim, 1)) span_end_encoding_dim = span_end_encoder.get_output_dim() #span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim + 2 * selfattent_dim self._span_end_predictor = TimeDistributed( torch.nn.Linear(span_end_input_dim, 1)) # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily # obvious from the configuration files, so we check here. check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim + 2 * selfattent_dim, "modeling layer input dim", "4 * encoding dim") check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(), "text field embedder output dim", "phrase layer input dim") check_dimensions_match( span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim + 2 * selfattent_dim, "span end encoder input dim", "4 * encoding dim + 3 * modeling dim") self._na_accuracy = CategoricalAccuracy() self._span_start_accuracy = CategoricalAccuracy() self._span_end_accuracy = CategoricalAccuracy() self._span_accuracy = BooleanAccuracy() self._squad_metrics = SquadEmAndF1() self._na_dense = lambda in_dim: torch.nn.Linear(in_dim, 2).cuda() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._mask_lstms = mask_lstms initializer(self)