Beispiel #1
0
 def test_forward_works_with_subtract_combinations(self):
     linear = LinearSimilarity(2, 2, combination='x-y')
     linear._weight_vector = Parameter(torch.FloatTensor([-.3, .5]))
     linear._bias = Parameter(torch.FloatTensor([0]))
     a_vectors = Variable(torch.FloatTensor([[1, 1], [-1, -1]]))
     b_vectors = Variable(torch.FloatTensor([[1, 0], [0, 1]]))
     result = linear(a_vectors, b_vectors).data.numpy()
     assert result.shape == (2,)
     assert_almost_equal(result, [.5, -.7])
Beispiel #2
0
 def test_forward_does_a_weighted_product(self):
     linear = LinearSimilarity(3, 1, combination='x,y')
     linear._weight_vector = Parameter(torch.FloatTensor([-.3, .5, 2.0, -1.0]))
     linear._bias = Parameter(torch.FloatTensor([.1]))
     a_vectors = torch.FloatTensor([[[1, 1, 1], [-1, -1, 0]]])
     b_vectors = torch.FloatTensor([[[0], [1]]])
     result = linear(Variable(a_vectors), Variable(b_vectors)).data.numpy()
     assert result.shape == (1, 2,)
     assert_almost_equal(result, [[2.3, -1.1]])
Beispiel #3
0
 def test_forward_works_with_divide_combinations(self):
     linear = LinearSimilarity(2, 2, combination='x/y')
     linear._weight_vector = Parameter(torch.FloatTensor([-.3, .5]))
     linear._bias = Parameter(torch.FloatTensor([0]))
     a_vectors = torch.FloatTensor([[1, 1], [-1, -1]])
     b_vectors = torch.FloatTensor([[1, 2], [2, 1]])
     result = linear(a_vectors, b_vectors).data.numpy()
     assert result.shape == (2,)
     assert_almost_equal(result, [-.05, -.35])
 def test_forward_works_with_subtract_combinations(self):
     linear = LinearSimilarity(2, 2, combination='x-y')
     linear._weight_vector = Parameter(torch.FloatTensor([-.3, .5]))
     linear._bias = Parameter(torch.FloatTensor([0]))
     a_vectors = torch.FloatTensor([[1, 1], [-1, -1]])
     b_vectors = torch.FloatTensor([[1, 0], [0, 1]])
     result = linear(a_vectors, b_vectors).data.numpy()
     assert result.shape == (2, )
     assert_almost_equal(result, [.5, -.7])
Beispiel #5
0
 def test_forward_works_with_divide_combinations(self):
     linear = LinearSimilarity(2, 2, combination='x/y')
     linear._weight_vector = Parameter(torch.FloatTensor([-.3, .5]))
     linear._bias = Parameter(torch.FloatTensor([0]))
     a_vectors = Variable(torch.FloatTensor([[1, 1], [-1, -1]]))
     b_vectors = Variable(torch.FloatTensor([[1, 2], [2, 1]]))
     result = linear(a_vectors, b_vectors).data.numpy()
     assert result.shape == (2, )
     assert_almost_equal(result, [-.05, -.35])
Beispiel #6
0
 def test_forward_does_a_weighted_product(self):
     linear = LinearSimilarity(3, 1, combination="x,y")
     linear._weight_vector = Parameter(
         torch.FloatTensor([-0.3, 0.5, 2.0, -1.0]))
     linear._bias = Parameter(torch.FloatTensor([0.1]))
     a_vectors = torch.FloatTensor([[[1, 1, 1], [-1, -1, 0]]])
     b_vectors = torch.FloatTensor([[[0], [1]]])
     result = linear(a_vectors, b_vectors).data.numpy()
     assert result.shape == (1, 2)
     assert_almost_equal(result, [[2.3, -1.1]])
Beispiel #7
0
 def test_forward_works_with_higher_order_tensors(self):
     linear = LinearSimilarity(7, 7, combination='x,y')
     weights = numpy.random.rand(14)
     linear._weight_vector = Parameter(torch.from_numpy(weights).float())
     linear._bias = Parameter(torch.FloatTensor([0.]))
     a_vectors = numpy.random.rand(5, 4, 3, 6, 7)
     b_vectors = numpy.random.rand(5, 4, 3, 6, 7)
     result = linear(Variable(torch.from_numpy(a_vectors).float()),
                     Variable(torch.from_numpy(b_vectors).float()))
     result = result.data.numpy()
     assert result.shape == (5, 4, 3, 6)
     combined_vectors = numpy.concatenate([a_vectors[3, 2, 1, 3, :], b_vectors[3, 2, 1, 3, :]])
     expected_result = numpy.dot(combined_vectors, weights)
     assert_almost_equal(result[3, 2, 1, 3], expected_result, decimal=6)
Beispiel #8
0
 def test_forward_works_with_higher_order_tensors(self):
     linear = LinearSimilarity(7, 7, combination='x,y')
     weights = numpy.random.rand(14)
     linear._weight_vector = Parameter(torch.from_numpy(weights).float())
     linear._bias = Parameter(torch.FloatTensor([0.]))
     a_vectors = numpy.random.rand(5, 4, 3, 6, 7)
     b_vectors = numpy.random.rand(5, 4, 3, 6, 7)
     result = linear(Variable(torch.from_numpy(a_vectors).float()),
                     Variable(torch.from_numpy(b_vectors).float()))
     result = result.data.numpy()
     assert result.shape == (5, 4, 3, 6)
     combined_vectors = numpy.concatenate(
         [a_vectors[3, 2, 1, 3, :], b_vectors[3, 2, 1, 3, :]])
     expected_result = numpy.dot(combined_vectors, weights)
     assert_almost_equal(result[3, 2, 1, 3], expected_result, decimal=6)
Beispiel #9
0
 def test_can_construct_from_params(self):
     params = Params({
             'tensor_1_dim': 4,
             'tensor_2_dim': 4,
             'combination': 'x,y,x*y,y-x'
             })
     linear = LinearSimilarity.from_params(params)
     assert list(linear._weight_vector.size()) == [16]
 def test_can_construct_from_params(self):
     params = Params({
         'tensor_1_dim': 4,
         'tensor_2_dim': 4,
         'combination': 'x,y,x*y,y-x'
     })
     linear = LinearSimilarity.from_params(params)
     assert list(linear._weight_vector.size()) == [16]
Beispiel #11
0
    def __init__(self, vocab: Vocabulary, cf_a, preloaded_elmo=None) -> None:
        super(BidirectionalAttentionFlow_1,
              self).__init__(vocab, cf_a.regularizer)
        """
        Initialize some data structures 
        """
        self.cf_a = cf_a
        # Bayesian data models
        self.VBmodels = []
        self.LinearModels = []
        """
        ############## TEXT FIELD EMBEDDER with ELMO ####################
        text_field_embedder : ``TextFieldEmbedder``
            Used to embed the ``question`` and ``passage`` ``TextFields`` we get as input to the model.
        """
        if (cf_a.use_ELMO):
            if (type(preloaded_elmo) != type(None)):
                text_field_embedder = preloaded_elmo
            else:
                text_field_embedder = bidut.download_Elmo(
                    cf_a.ELMO_num_layers, cf_a.ELMO_droput)
                print("ELMO loaded from disk or downloaded")
        else:
            text_field_embedder = None

#        embedder_out_dim  = text_field_embedder.get_output_dim()
        self._text_field_embedder = text_field_embedder

        if (cf_a.Add_Linear_projection_ELMO):
            if (self.cf_a.VB_Linear_projection_ELMO):
                prior = Vil.Prior(**(cf_a.VB_Linear_projection_ELMO_prior))
                print(
                    "----------------- Bayesian Linear Projection ELMO --------------"
                )
                linear_projection_ELMO = LinearVB(
                    text_field_embedder.get_output_dim(), 200, prior=prior)
                self.VBmodels.append(linear_projection_ELMO)
            else:
                linear_projection_ELMO = torch.nn.Linear(
                    text_field_embedder.get_output_dim(), 200)

            self._linear_projection_ELMO = linear_projection_ELMO
        """
        ############## Highway layers ####################
        num_highway_layers : ``int``
            The number of highway layers to use in between embedding the input and passing it through
            the phrase layer.
        """

        Input_dimension_highway = None
        if (cf_a.Add_Linear_projection_ELMO):
            Input_dimension_highway = 200
        else:
            Input_dimension_highway = text_field_embedder.get_output_dim()

        num_highway_layers = cf_a.num_highway_layers
        # Linear later to compute the start
        if (self.cf_a.VB_highway_layers):
            print("----------------- Bayesian Highway network  --------------")
            prior = Vil.Prior(**(cf_a.VB_highway_layers_prior))
            highway_layer = HighwayVB(Input_dimension_highway,
                                      num_highway_layers,
                                      prior=prior)
            self.VBmodels.append(highway_layer)
        else:

            highway_layer = Highway(Input_dimension_highway,
                                    num_highway_layers)
        highway_layer = TimeDistributed(highway_layer)

        self._highway_layer = highway_layer
        """
        ############## Phrase layer ####################
        phrase_layer : ``Seq2SeqEncoder``
            The encoder (with its own internal stacking) that we will use in between embedding tokens
            and doing the bidirectional attention.
        """
        if cf_a.phrase_layer_dropout > 0:  ## Create dropout layer
            dropout_phrase_layer = torch.nn.Dropout(
                p=cf_a.phrase_layer_dropout)
        else:
            dropout_phrase_layer = lambda x: x

        phrase_layer = PytorchSeq2SeqWrapper(
            torch.nn.LSTM(Input_dimension_highway,
                          hidden_size=cf_a.phrase_layer_hidden_size,
                          batch_first=True,
                          bidirectional=True,
                          num_layers=cf_a.phrase_layer_num_layers,
                          dropout=cf_a.phrase_layer_dropout))

        phrase_encoding_out_dim = cf_a.phrase_layer_hidden_size * 2
        self._phrase_layer = phrase_layer
        self._dropout_phrase_layer = dropout_phrase_layer
        """
        ############## Matrix attention layer ####################
        similarity_function : ``SimilarityFunction``
            The similarity function that we will use when comparing encoded passage and question
            representations.
        """

        # Linear later to compute the start
        if (self.cf_a.VB_similarity_function):
            prior = Vil.Prior(**(cf_a.VB_similarity_function_prior))
            print(
                "----------------- Bayesian Similarity matrix --------------")
            similarity_function = LinearSimilarityVB(
                combination="x,y,x*y",
                tensor_1_dim=phrase_encoding_out_dim,
                tensor_2_dim=phrase_encoding_out_dim,
                prior=prior)
            self.VBmodels.append(similarity_function)
        else:
            similarity_function = LinearSimilarity(
                combination="x,y,x*y",
                tensor_1_dim=phrase_encoding_out_dim,
                tensor_2_dim=phrase_encoding_out_dim)

        matrix_attention = LegacyMatrixAttention(similarity_function)
        self._matrix_attention = matrix_attention
        """
        ############## Modelling Layer ####################
        modeling_layer : ``Seq2SeqEncoder``
            The encoder (with its own internal stacking) that we will use in between the bidirectional
            attention and predicting span start and end.
        """
        ## Create dropout layer
        if cf_a.modeling_passage_dropout > 0:  ## Create dropout layer
            dropout_modeling_passage = torch.nn.Dropout(
                p=cf_a.modeling_passage_dropout)
        else:
            dropout_modeling_passage = lambda x: x

        modeling_layer = PytorchSeq2SeqWrapper(
            torch.nn.LSTM(phrase_encoding_out_dim * 4,
                          hidden_size=cf_a.modeling_passage_hidden_size,
                          batch_first=True,
                          bidirectional=True,
                          num_layers=cf_a.modeling_passage_num_layers,
                          dropout=cf_a.modeling_passage_dropout))

        self._modeling_layer = modeling_layer
        self._dropout_modeling_passage = dropout_modeling_passage
        """
        ############## Span Start Representation #####################
        span_end_encoder : ``Seq2SeqEncoder``
            The encoder that we will use to incorporate span start predictions into the passage state
            before predicting span end.
        """
        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        span_start_input_dim = encoding_dim * 4 + modeling_dim

        # Linear later to compute the start
        if (self.cf_a.VB_span_start_predictor_linear):
            prior = Vil.Prior(**(cf_a.VB_span_start_predictor_linear_prior))
            print(
                "----------------- Bayesian Span Start Predictor--------------"
            )
            span_start_predictor_linear = LinearVB(span_start_input_dim,
                                                   1,
                                                   prior=prior)
            self.VBmodels.append(span_start_predictor_linear)
        else:
            span_start_predictor_linear = torch.nn.Linear(
                span_start_input_dim, 1)

        self._span_start_predictor_linear = span_start_predictor_linear
        self._span_start_predictor = TimeDistributed(
            span_start_predictor_linear)
        """
        ############## Span End Representation #####################
        """

        ## Create dropout layer
        if cf_a.span_end_encoder_dropout > 0:
            dropout_span_end_encode = torch.nn.Dropout(
                p=cf_a.span_end_encoder_dropout)
        else:
            dropout_span_end_encode = lambda x: x

        span_end_encoder = PytorchSeq2SeqWrapper(
            torch.nn.LSTM(encoding_dim * 4 + modeling_dim * 3,
                          hidden_size=cf_a.modeling_span_end_hidden_size,
                          batch_first=True,
                          bidirectional=True,
                          num_layers=cf_a.modeling_span_end_num_layers,
                          dropout=cf_a.span_end_encoder_dropout))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim

        self._span_end_encoder = span_end_encoder
        self._dropout_span_end_encode = dropout_span_end_encode

        if (self.cf_a.VB_span_end_predictor_linear):
            print(
                "----------------- Bayesian Span End Predictor--------------")
            prior = Vil.Prior(**(cf_a.VB_span_end_predictor_linear_prior))
            span_end_predictor_linear = LinearVB(span_end_input_dim,
                                                 1,
                                                 prior=prior)
            self.VBmodels.append(span_end_predictor_linear)
        else:
            span_end_predictor_linear = torch.nn.Linear(span_end_input_dim, 1)

        self._span_end_predictor_linear = span_end_predictor_linear
        self._span_end_predictor = TimeDistributed(span_end_predictor_linear)
        """
        Dropput last layers
        """
        if cf_a.spans_output_dropout > 0:
            dropout_spans_output = torch.nn.Dropout(
                p=cf_a.span_end_encoder_dropout)
        else:
            dropout_spans_output = lambda x: x

        self._dropout_spans_output = dropout_spans_output
        """
        Checkings and accuracy
        """
        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(modeling_layer.get_input_dim(),
                               4 * encoding_dim, "modeling layer input dim",
                               "4 * encoding dim")
        check_dimensions_match(Input_dimension_highway,
                               phrase_layer.get_input_dim(),
                               "text field embedder output dim",
                               "phrase layer input dim")
        check_dimensions_match(span_end_encoder.get_input_dim(),
                               4 * encoding_dim + 3 * modeling_dim,
                               "span end encoder input dim",
                               "4 * encoding dim + 3 * modeling dim")

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        """
        mask_lstms : ``bool``, optional (default=True)
            If ``False``, we will skip passing the mask to the LSTM layers.  This gives a ~2x speedup,
            with only a slight performance decrease, if any.  We haven't experimented much with this
            yet, but have confirmed that we still get very similar performance with much faster
            training times.  We still use the mask for all softmaxes, but avoid the shuffling that's
            required when using masking with pytorch LSTMs.
        """
        self._mask_lstms = cf_a.mask_lstms
        """
        ################### Initialize parameters ##############################
        """
        #### THEY ARE ALL INITIALIZED WHEN INSTANTING THE COMPONENTS ###
        """
        ####################### OPTIMIZER ################
        """
        optimizer = pytut.get_optimizers(self, cf_a)
        self._optimizer = optimizer
 def test_weights_are_correct_sizes(self):
     linear = LinearSimilarity(tensor_1_dim=3,
                               tensor_2_dim=6,
                               combination='x,y')
     assert list(linear._weight_vector.size()) == [9]
     assert list(linear._bias.size()) == [1]
# The dimensionality of the co
encoding_dim = encoded_question.size(-1)
print("encoding_dim: ", encoding_dim)
print("Question encoding: ", encoded_question.shape)
print("Passage encoding: ", encoded_passage.shape)
"""
################### SIMILARITY FUNCTION LAYER  #########################################
NOTE: Since the LSTM implementation of PyTorch cannot apply dropout in the last layer, 
we just apply ourselves later
"""

print("-------------- SIMILARITY LAYER ---------------")

similarity_function = LinearSimilarity(combination="x,y,x*y",
                                       tensor_1_dim=200,
                                       tensor_2_dim=200)

matrix_attention = LegacyMatrixAttention(similarity_function)

passage_question_similarity = matrix_attention(encoded_passage,
                                               encoded_question)
# Shape: (batch_size, passage_length, question_length)
print("passage question similarity: ", passage_question_similarity.shape)

# Shape: (batch_size, passage_length, question_length)
passage_question_attention = util.masked_softmax(passage_question_similarity,
                                                 question_mask)
# Shape: (batch_size, passage_length, encoding_dim)
passage_question_vectors = util.weighted_sum(encoded_question,
                                             passage_question_attention)