Example #1
0
    def __init__(self,
                 options_file: str,
                 weight_file: str,
                 requires_grad: bool = False) -> None:
        super(_ElmoBiLm, self).__init__()

        self._token_embedder = _ElmoCharacterEncoder(
            options_file, weight_file, requires_grad=requires_grad)

        with open(cached_path(options_file), 'r') as fin:
            options = json.load(fin)
        if not options['lstm'].get('use_skip_connections'):
            raise ConfigurationError(
                'We only support pretrained biLMs with residual connections')
        self._elmo_lstm = ElmoLstm(
            input_size=options['lstm']['projection_dim'],
            hidden_size=options['lstm']['projection_dim'],
            cell_size=options['lstm']['dim'],
            num_layers=options['lstm']['n_layers'],
            memory_cell_clip_value=options['lstm']['cell_clip'],
            state_projection_clip_value=options['lstm']['proj_clip'],
            requires_grad=requires_grad)
        self._elmo_lstm.load_weights(weight_file)
        # Number of representation layers including context independent layer
        self.num_layers = options['lstm']['n_layers'] + 1
    def test_elmo_lstm(self):
        input_tensor = torch.rand(4, 5, 3)
        input_tensor[1, 4:, :] = 0.
        input_tensor[2, 2:, :] = 0.
        input_tensor[3, 1:, :] = 0.
        mask = torch.ones([4, 5])
        mask[1, 4:] = 0.
        mask[2, 2:] = 0.
        mask[3, 1:] = 0.

        lstm = ElmoLstm(num_layers=2,
                        input_size=3,
                        hidden_size=5,
                        cell_size=7,
                        memory_cell_clip_value=2,
                        state_projection_clip_value=1)
        output_sequence = lstm(input_tensor, mask)

        # Check all the layer outputs are masked properly.
        numpy.testing.assert_array_equal(
            output_sequence.data[:, 1, 4:, :].numpy(), 0.0)
        numpy.testing.assert_array_equal(
            output_sequence.data[:, 2, 2:, :].numpy(), 0.0)
        numpy.testing.assert_array_equal(
            output_sequence.data[:, 3, 1:, :].numpy(), 0.0)

        # LSTM state should be (num_layers, batch_size, hidden_size)
        assert list(lstm._states[0].size()) == [2, 4, 10]
        # LSTM memory cell should be (num_layers, batch_size, cell_size)
        assert list((lstm._states[1].size())) == [2, 4, 14]
Example #3
0
    def __init__(self,
                 options_file: str,
                 weight_file: str,
                 requires_grad: bool = False,
                 vocab_to_cache: List[str] = None) -> None:
        super(_ElmoBiLm, self).__init__()

        self._token_embedder = _ElmoCharacterEncoder(
            options_file,
            weight_file,
            requires_grad=False
            if vocab_to_cache is not None else requires_grad)

        self._requires_grad = requires_grad
        if requires_grad and vocab_to_cache:
            logging.warning(
                "You are fine tuning ELMo and caching char CNN word vectors. "
                "This behaviour is not guaranteed to be well defined, particularly. "
                "if not all of your inputs will occur in the vocabulary cache. "
                "_ElmoCharacterEncoder will be frozen because "
                "it is not used after word embedding caching.")
        # This is an embedding, used to look up cached
        # word vectors built from character level cnn embeddings.
        self._word_embedding = None
        self._bos_embedding: torch.Tensor = None
        self._eos_embedding: torch.Tensor = None
        if vocab_to_cache:
            logging.info(
                "Caching character cnn layers for words in vocabulary.")
            # This sets 3 attributes, _word_embedding, _bos_embedding and _eos_embedding.
            # They are set in the method so they can be accessed from outside the
            # constructor.
            self.create_cached_cnn_embeddings(vocab_to_cache)

        with open(cached_path(options_file), 'r') as fin:
            options = json.load(fin)
        if not options['lstm'].get('use_skip_connections'):
            raise ConfigurationError(
                'We only support pretrained biLMs with residual connections')
        self._elmo_lstm = ElmoLstm(
            input_size=options['lstm']['projection_dim'],
            hidden_size=options['lstm']['projection_dim'],
            cell_size=options['lstm']['dim'],
            num_layers=options['lstm']['n_layers'],
            memory_cell_clip_value=options['lstm']['cell_clip'],
            state_projection_clip_value=options['lstm']['proj_clip'],
            requires_grad=requires_grad)
        self._elmo_lstm.load_weights(weight_file)
        # Number of representation layers including context independent layer
        self.num_layers = options['lstm']['n_layers'] + 1
Example #4
0
    def __init__(self, conf: Dict,
                 word_batch: WordBatch,
                 char_batch: CharacterBatch):
        super(BiLMBase, self).__init__()
        self.conf = conf

        c = conf['token_embedder']
        if word_batch is not None:
            if 'pretrained' in c:
                embs = load_embedding_txt(c['pretrained'], c['has_header'])
                logger.info('loaded {0} embedding entries.'.format(len(embs[0])))
            else:
                embs = None
            word_embedder = Embeddings(c['word_dim'], word_batch.mapping, embs=embs, fix_emb=False, normalize=False)
        else:
            word_embedder = None

        if char_batch is not None:
            dim = c.get('char_dim') if c.get('char_dim', 0) > 0 else c.get('wordpiece_dim')
            char_embedder = Embeddings(dim, char_batch.mapping, embs=None, fix_emb=False, normalize=False)
        else:
            char_embedder = None

        token_embedder_name = c['name'].lower()
        if token_embedder_name == 'cnn':
            self.token_embedder = ConvTokenEmbedder(output_dim=conf['encoder']['projection_dim'],
                                                    word_embedder=word_embedder,
                                                    char_embedder=char_embedder,
                                                    filters=c['filters'],
                                                    n_highway=c['n_highway'],
                                                    activation=c['activation'])
        elif token_embedder_name == 'lstm':
            self.token_embedder = LstmTokenEmbedder(output_dim=conf['encoder']['projection_dim'],
                                                    word_embedder=word_embedder,
                                                    char_embedder=char_embedder,
                                                    dropout=conf['dropout'])
        elif token_embedder_name == 'grecnn':
            self.token_embedder = GatedRecNNTokenEmbedder(output_dim=conf['encoder']['projection_dim'],
                                                          word_embedder=word_embedder,
                                                          char_embedder=char_embedder)
        elif token_embedder_name == 'sum':
            self.token_embedder = SumTokenEmbedder(output_dim=conf['encoder']['projection_dim'],
                                                   word_embedder=word_embedder,
                                                   char_embedder=char_embedder)
        else:
            raise ValueError('Unknown token embedder name: {}'.format(token_embedder_name))

        self.add_sentence_boundary = c.get('add_sentence_boundary', False)
        self.add_sentence_boundary_ids = c.get('add_sentence_boundary_ids', False)
        assert not (self.add_sentence_boundary and self.add_sentence_boundary_ids)

        if self.add_sentence_boundary:
            dim = self.token_embedder.get_output_dim()
            self.bos_embeddings = torch.nn.Parameter(torch.randn(dim) / math.sqrt(dim))
            self.eos_embeddings = torch.nn.Parameter(torch.randn(dim) / math.sqrt(dim))

        c = conf['encoder']
        encoder_name = c['name'].lower()
        if encoder_name == 'elmo':
            # NOTE: for fare comparison, we set stateful to false
            self.encoder = ElmoLstm(input_size=c['projection_dim'],
                                    hidden_size=c['projection_dim'],
                                    cell_size=c['dim'],
                                    requires_grad=True,
                                    num_layers=c['n_layers'],
                                    recurrent_dropout_probability=conf['dropout'],
                                    memory_cell_clip_value=c['cell_clip'],
                                    state_projection_clip_value=c['proj_clip'],
                                    stateful=False)
        elif encoder_name == 'lstm':
            self.encoder = LstmbiLm(input_size=c['projection_dim'],
                                    hidden_size=c['projection_dim'],
                                    num_layers=c['n_layers'],
                                    dropout=conf['dropout'])
        elif encoder_name == 'bengio03highway':
            self.encoder = Bengio03HighwayBiLm(width=c['width'],
                                               input_size=c['projection_dim'],
                                               hidden_size=c['projection_dim'],
                                               n_layers=c['n_layers'],
                                               n_highway=c['n_highway'],
                                               use_position=c.get('position', False),
                                               dropout=conf['dropout'])
        elif encoder_name == 'bengio03highway_v2':
            self.encoder = Bengio03HighwayBiLmV2(width=c['width'],
                                                 input_size=c['projection_dim'],
                                                 hidden_size=c['projection_dim'],
                                                 n_layers=c['n_layers'],
                                                 n_highway=c['n_highway'],
                                                 use_position=c.get('position', False),
                                                 dropout=conf['dropout'])
        elif encoder_name == 'bengio03resnet':
            self.encoder = Bengio03ResNetBiLm(width=c['width'],
                                              input_size=c['projection_dim'],
                                              hidden_size=c['projection_dim'],
                                              n_layers=c['n_layers'],
                                              use_position=c.get('position', False),
                                              dropout=conf['dropout'])
        elif encoder_name == 'lblhighway':
            self.encoder = LBLHighwayBiLm(width=c['width'],
                                          input_size=c['projection_dim'],
                                          hidden_size=c['projection_dim'],
                                          n_layers=c['n_layers'],
                                          n_highway=c['n_highway'],
                                          use_position=c.get('position', False),
                                          dropout=conf['dropout'])
        elif encoder_name == 'lblhighway_v2':
            self.encoder = LBLHighwayBiLmV2(width=c['width'],
                                            input_size=c['projection_dim'],
                                            hidden_size=c['projection_dim'],
                                            n_layers=c['n_layers'],
                                            n_highway=c['n_highway'],
                                            use_position=c.get('position', False),
                                            dropout=conf['dropout'])
        elif encoder_name == 'lblresnet':
            self.encoder = LBLResNetBiLm(width=c['width'],
                                         input_size=c['projection_dim'],
                                         hidden_size=c['projection_dim'],
                                         n_layers=c['n_layers'],
                                         use_position=c.get('position', False),
                                         dropout=conf['dropout'])
        elif encoder_name == 'selfattn':
            self.encoder = SelfAttentiveLBLBiLM(width=c['width'],
                                                input_size=c['projection_dim'],
                                                hidden_size=c['projection_dim'],
                                                n_heads=c['n_heads'],
                                                n_layers=c['n_layers'],
                                                n_highway=c['n_highway'],
                                                use_position=c.get('position', False),
                                                use_relative_position=c.get('relative_position_weights', False),
                                                dropout=conf['dropout'])
        elif encoder_name == 'selfattn_v2':
            self.encoder = SelfAttentiveLBLBiLMV2(width=c['width'],
                                                  input_size=c['projection_dim'],
                                                  hidden_size=c['projection_dim'],
                                                  n_heads=c['n_heads'],
                                                  n_layers=c['n_layers'],
                                                  n_highway=c['n_highway'],
                                                  use_position=c.get('position', False),
                                                  use_relative_position=c.get('relative_position_weights', False),
                                                  dropout=conf['dropout'])
        elif encoder_name == 'selfattn_v3':
            self.encoder = SelfAttentiveLBLBiLMV3(width=c['width'],
                                                  input_size=c['projection_dim'],
                                                  hidden_size=c['projection_dim'],
                                                  n_heads=c['n_heads'],
                                                  n_layers=c['n_layers'],
                                                  n_highway=c['n_highway'],
                                                  use_position=c.get('position', False),
                                                  use_relative_position=c.get('relative_position_weights', False),
                                                  dropout=conf['dropout'])
        elif encoder_name == 'cnn':
            self.encoder = GatedCnnLm(input_size=c['projection_dim'],
                                      layers=c['layers'],
                                      dropout=conf['dropout'])
        else:
            raise ValueError('Unknown encoder name: {}'.format(encoder_name))

        self.output_dim = conf['encoder']['projection_dim']

        self.token_embedding_time = 0
        self.encoding_time = 0