예제 #1
0
    def test_get_best_span(self):
        # pylint: disable=protected-access

        span_begin_probs = torch.FloatTensor([[0.1, 0.3, 0.05, 0.3,
                                               0.25]]).log()
        span_end_probs = torch.FloatTensor([[0.65, 0.05, 0.2, 0.05,
                                             0.05]]).log()
        begin_end_idxs = BidirectionalAttentionFlow.get_best_span(
            span_begin_probs, span_end_probs)
        assert_almost_equal(begin_end_idxs.data.numpy(), [[0, 0]])

        # When we were using exclusive span ends, this was an edge case of the dynamic program.
        # We're keeping the test to make sure we get it right now, after the switch in inclusive
        # span end.  The best answer is (1, 1).
        span_begin_probs = torch.FloatTensor([[0.4, 0.5, 0.1]]).log()
        span_end_probs = torch.FloatTensor([[0.3, 0.6, 0.1]]).log()
        begin_end_idxs = BidirectionalAttentionFlow.get_best_span(
            span_begin_probs, span_end_probs)
        assert_almost_equal(begin_end_idxs.data.numpy(), [[1, 1]])

        # Another instance that used to be an edge case.
        span_begin_probs = torch.FloatTensor([[0.8, 0.1, 0.1]]).log()
        span_end_probs = torch.FloatTensor([[0.8, 0.1, 0.1]]).log()
        begin_end_idxs = BidirectionalAttentionFlow.get_best_span(
            span_begin_probs, span_end_probs)
        assert_almost_equal(begin_end_idxs.data.numpy(), [[0, 0]])

        span_begin_probs = torch.FloatTensor([[0.1, 0.2, 0.05, 0.3,
                                               0.25]]).log()
        span_end_probs = torch.FloatTensor([[0.1, 0.2, 0.5, 0.05, 0.15]]).log()
        begin_end_idxs = BidirectionalAttentionFlow.get_best_span(
            span_begin_probs, span_end_probs)
        assert_almost_equal(begin_end_idxs.data.numpy(), [[1, 2]])
예제 #2
0
    def test_get_best_span(self):
        # pylint: disable=protected-access

        span_begin_probs = Variable(torch.FloatTensor([[0.1, 0.3, 0.05, 0.3, 0.25]])).log()
        span_end_probs = Variable(torch.FloatTensor([[0.65, 0.05, 0.2, 0.05, 0.05]])).log()
        begin_end_idxs = BidirectionalAttentionFlow._get_best_span(span_begin_probs, span_end_probs)
        assert_almost_equal(begin_end_idxs.data.numpy(), [[0, 0]])

        # When we were using exlcusive span ends, this was an edge case of the dynamic program.
        # We're keeping the test to make sure we get it right now, after the switch in inclusive
        # span end.  The best answer is (1, 1).
        span_begin_probs = Variable(torch.FloatTensor([[0.4, 0.5, 0.1]])).log()
        span_end_probs = Variable(torch.FloatTensor([[0.3, 0.6, 0.1]])).log()
        begin_end_idxs = BidirectionalAttentionFlow._get_best_span(span_begin_probs, span_end_probs)
        assert_almost_equal(begin_end_idxs.data.numpy(), [[1, 1]])

        # Another instance that used to be an edge case.
        span_begin_probs = Variable(torch.FloatTensor([[0.8, 0.1, 0.1]])).log()
        span_end_probs = Variable(torch.FloatTensor([[0.8, 0.1, 0.1]])).log()
        begin_end_idxs = BidirectionalAttentionFlow._get_best_span(span_begin_probs, span_end_probs)
        assert_almost_equal(begin_end_idxs.data.numpy(), [[0, 0]])

        span_begin_probs = Variable(torch.FloatTensor([[0.1, 0.2, 0.05, 0.3, 0.25]])).log()
        span_end_probs = Variable(torch.FloatTensor([[0.1, 0.2, 0.5, 0.05, 0.15]])).log()
        begin_end_idxs = BidirectionalAttentionFlow._get_best_span(span_begin_probs, span_end_probs)
        assert_almost_equal(begin_end_idxs.data.numpy(), [[1, 2]])
예제 #3
0
    def __init__(self, nlp_toolkit: NLPToolkit):
        self.nlp_toolkit = nlp_toolkit
        archive_loader = ArchiveLoader(_MODEL_ARCHIVE)
        config = archive_loader.get_config()
        model_params = config.get('model')
        vocabulary = archive_loader.get_vocabulary()

        self.bidaf_model = BidirectionalAttentionFlow.from_params(
            vocabulary, model_params)
        model_state = archive_loader.get_model_state()
        self.bidaf_model.load_state_dict(model_state)

        self.vocab_reader = archive_loader.get_vocab_reader()
예제 #4
0
    def test_get_best_span(self):
        # pylint: disable=protected-access

        # Note that the best span cannot be (1, 0) since even though 0.3 * 0.5 is the greatest
        # value, the end span index is constrained to occur after the begin span index.
        span_begin_probs = Variable(
            torch.FloatTensor([[0.1, 0.3, 0.05, 0.3, 0.25]])).log()
        span_end_probs = Variable(
            torch.FloatTensor([[0.5, 0.1, 0.2, 0.05, 0.15]])).log()
        begin_end_idxs = BidirectionalAttentionFlow._get_best_span(
            span_begin_probs, span_end_probs)
        assert_almost_equal(begin_end_idxs.data.numpy(), [[1, 2]])

        # Testing an edge case of the dynamic program here, for the order of when you update the
        # best previous span position.  We should not get (1, 1), because that's an empty span.
        span_begin_probs = Variable(torch.FloatTensor([[0.4, 0.5, 0.1]])).log()
        span_end_probs = Variable(torch.FloatTensor([[0.3, 0.6, 0.1]])).log()
        begin_end_idxs = BidirectionalAttentionFlow._get_best_span(
            span_begin_probs, span_end_probs)
        assert_almost_equal(begin_end_idxs.data.numpy(), [[0, 1]])

        # Testing another edge case of the dynamic program here, where (0, 0) is the best solution
        # without constraints.
        span_begin_probs = Variable(torch.FloatTensor([[0.8, 0.1, 0.1]])).log()
        span_end_probs = Variable(torch.FloatTensor([[0.8, 0.1, 0.1]])).log()
        begin_end_idxs = BidirectionalAttentionFlow._get_best_span(
            span_begin_probs, span_end_probs)
        assert_almost_equal(begin_end_idxs.data.numpy(), [[0, 1]])

        # test higher-order input
        # Note that the best span cannot be (1, 1) since even though 0.3 * 0.5 is the greatest
        # value, the end span index is constrained to occur after the begin span index.
        span_begin_probs = Variable(
            torch.FloatTensor([[0.1, 0.3, 0.05, 0.3, 0.25]])).log()
        span_end_probs = Variable(
            torch.FloatTensor([[0.1, 0.5, 0.2, 0.05, 0.15]])).log()
        begin_end_idxs = BidirectionalAttentionFlow._get_best_span(
            span_begin_probs, span_end_probs)
        assert_almost_equal(begin_end_idxs.data.numpy(), [[1, 2]])
예제 #5
0
    def setUp(self):
        super(BidirectionalAttentionFlowTest, self).setUp()

        constants.GLOVE_PATH = 'tests/fixtures/glove.6B.100d.sample.txt.gz'
        reader_params = Params({
            'token_indexers': {
                'tokens': {
                    'type': 'single_id'
                },
                'token_characters': {
                    'type': 'characters'
                }
            }
        })
        dataset = SquadReader.from_params(reader_params).read(
            'tests/fixtures/data/squad.json')
        vocab = Vocabulary.from_dataset(dataset)
        self.vocab = vocab
        dataset.index_instances(vocab)
        self.dataset = dataset
        self.token_indexers = {
            'tokens': SingleIdTokenIndexer(),
            'token_characters': TokenCharactersIndexer()
        }

        self.model = BidirectionalAttentionFlow.from_params(
            self.vocab, Params({}))

        small_params = Params({
            'text_field_embedder': {
                'tokens': {
                    'type': 'embedding',
                    'pretrained_file': constants.GLOVE_PATH,
                    'trainable': False,
                    'projection_dim': 4
                },
                'token_characters': {
                    'type': 'character_encoding',
                    'embedding': {
                        'embedding_dim': 8
                    },
                    'encoder': {
                        'type': 'cnn',
                        'embedding_dim': 8,
                        'num_filters': 4,
                        'ngram_filter_sizes': [5]
                    }
                }
            },
            'phrase_layer': {
                'type': 'lstm',
                'bidirectional': True,
                'input_size': 8,
                'hidden_size': 4,
                'num_layers': 1,
            },
            'similarity_function': {
                'type': 'linear',
                'combination': 'x,y,x*y',
                'tensor_1_dim': 8,
                'tensor_2_dim': 8
            },
            'modeling_layer': {
                'type': 'lstm',
                'bidirectional': True,
                'input_size': 32,
                'hidden_size': 4,
                'num_layers': 1,
            },
            'span_end_encoder': {
                'type': 'lstm',
                'bidirectional': True,
                'input_size': 56,
                'hidden_size': 4,
                'num_layers': 1,
            },
        })
        self.small_model = BidirectionalAttentionFlow.from_params(
            self.vocab, small_params)