def __init__(self, options_file: str, weight_file: str, requires_grad: bool = False) -> None: super(_ElmoBiLm, self).__init__() self._token_embedder = _ElmoCharacterEncoder( options_file, weight_file, requires_grad=requires_grad) with open(cached_path(options_file), 'r') as fin: options = json.load(fin) if not options['lstm'].get('use_skip_connections'): raise ConfigurationError( 'We only support pretrained biLMs with residual connections') self._elmo_lstm = ElmoLstm( input_size=options['lstm']['projection_dim'], hidden_size=options['lstm']['projection_dim'], cell_size=options['lstm']['dim'], num_layers=options['lstm']['n_layers'], memory_cell_clip_value=options['lstm']['cell_clip'], state_projection_clip_value=options['lstm']['proj_clip'], requires_grad=requires_grad) self._elmo_lstm.load_weights(weight_file) # Number of representation layers including context independent layer self.num_layers = options['lstm']['n_layers'] + 1
def test_elmo_lstm(self): input_tensor = torch.rand(4, 5, 3) input_tensor[1, 4:, :] = 0. input_tensor[2, 2:, :] = 0. input_tensor[3, 1:, :] = 0. mask = torch.ones([4, 5]) mask[1, 4:] = 0. mask[2, 2:] = 0. mask[3, 1:] = 0. lstm = ElmoLstm(num_layers=2, input_size=3, hidden_size=5, cell_size=7, memory_cell_clip_value=2, state_projection_clip_value=1) output_sequence = lstm(input_tensor, mask) # Check all the layer outputs are masked properly. numpy.testing.assert_array_equal( output_sequence.data[:, 1, 4:, :].numpy(), 0.0) numpy.testing.assert_array_equal( output_sequence.data[:, 2, 2:, :].numpy(), 0.0) numpy.testing.assert_array_equal( output_sequence.data[:, 3, 1:, :].numpy(), 0.0) # LSTM state should be (num_layers, batch_size, hidden_size) assert list(lstm._states[0].size()) == [2, 4, 10] # LSTM memory cell should be (num_layers, batch_size, cell_size) assert list((lstm._states[1].size())) == [2, 4, 14]
def __init__(self, options_file: str, weight_file: str, requires_grad: bool = False, vocab_to_cache: List[str] = None) -> None: super(_ElmoBiLm, self).__init__() self._token_embedder = _ElmoCharacterEncoder( options_file, weight_file, requires_grad=False if vocab_to_cache is not None else requires_grad) self._requires_grad = requires_grad if requires_grad and vocab_to_cache: logging.warning( "You are fine tuning ELMo and caching char CNN word vectors. " "This behaviour is not guaranteed to be well defined, particularly. " "if not all of your inputs will occur in the vocabulary cache. " "_ElmoCharacterEncoder will be frozen because " "it is not used after word embedding caching.") # This is an embedding, used to look up cached # word vectors built from character level cnn embeddings. self._word_embedding = None self._bos_embedding: torch.Tensor = None self._eos_embedding: torch.Tensor = None if vocab_to_cache: logging.info( "Caching character cnn layers for words in vocabulary.") # This sets 3 attributes, _word_embedding, _bos_embedding and _eos_embedding. # They are set in the method so they can be accessed from outside the # constructor. self.create_cached_cnn_embeddings(vocab_to_cache) with open(cached_path(options_file), 'r') as fin: options = json.load(fin) if not options['lstm'].get('use_skip_connections'): raise ConfigurationError( 'We only support pretrained biLMs with residual connections') self._elmo_lstm = ElmoLstm( input_size=options['lstm']['projection_dim'], hidden_size=options['lstm']['projection_dim'], cell_size=options['lstm']['dim'], num_layers=options['lstm']['n_layers'], memory_cell_clip_value=options['lstm']['cell_clip'], state_projection_clip_value=options['lstm']['proj_clip'], requires_grad=requires_grad) self._elmo_lstm.load_weights(weight_file) # Number of representation layers including context independent layer self.num_layers = options['lstm']['n_layers'] + 1
def __init__(self, conf: Dict, word_batch: WordBatch, char_batch: CharacterBatch): super(BiLMBase, self).__init__() self.conf = conf c = conf['token_embedder'] if word_batch is not None: if 'pretrained' in c: embs = load_embedding_txt(c['pretrained'], c['has_header']) logger.info('loaded {0} embedding entries.'.format(len(embs[0]))) else: embs = None word_embedder = Embeddings(c['word_dim'], word_batch.mapping, embs=embs, fix_emb=False, normalize=False) else: word_embedder = None if char_batch is not None: dim = c.get('char_dim') if c.get('char_dim', 0) > 0 else c.get('wordpiece_dim') char_embedder = Embeddings(dim, char_batch.mapping, embs=None, fix_emb=False, normalize=False) else: char_embedder = None token_embedder_name = c['name'].lower() if token_embedder_name == 'cnn': self.token_embedder = ConvTokenEmbedder(output_dim=conf['encoder']['projection_dim'], word_embedder=word_embedder, char_embedder=char_embedder, filters=c['filters'], n_highway=c['n_highway'], activation=c['activation']) elif token_embedder_name == 'lstm': self.token_embedder = LstmTokenEmbedder(output_dim=conf['encoder']['projection_dim'], word_embedder=word_embedder, char_embedder=char_embedder, dropout=conf['dropout']) elif token_embedder_name == 'grecnn': self.token_embedder = GatedRecNNTokenEmbedder(output_dim=conf['encoder']['projection_dim'], word_embedder=word_embedder, char_embedder=char_embedder) elif token_embedder_name == 'sum': self.token_embedder = SumTokenEmbedder(output_dim=conf['encoder']['projection_dim'], word_embedder=word_embedder, char_embedder=char_embedder) else: raise ValueError('Unknown token embedder name: {}'.format(token_embedder_name)) self.add_sentence_boundary = c.get('add_sentence_boundary', False) self.add_sentence_boundary_ids = c.get('add_sentence_boundary_ids', False) assert not (self.add_sentence_boundary and self.add_sentence_boundary_ids) if self.add_sentence_boundary: dim = self.token_embedder.get_output_dim() self.bos_embeddings = torch.nn.Parameter(torch.randn(dim) / math.sqrt(dim)) self.eos_embeddings = torch.nn.Parameter(torch.randn(dim) / math.sqrt(dim)) c = conf['encoder'] encoder_name = c['name'].lower() if encoder_name == 'elmo': # NOTE: for fare comparison, we set stateful to false self.encoder = ElmoLstm(input_size=c['projection_dim'], hidden_size=c['projection_dim'], cell_size=c['dim'], requires_grad=True, num_layers=c['n_layers'], recurrent_dropout_probability=conf['dropout'], memory_cell_clip_value=c['cell_clip'], state_projection_clip_value=c['proj_clip'], stateful=False) elif encoder_name == 'lstm': self.encoder = LstmbiLm(input_size=c['projection_dim'], hidden_size=c['projection_dim'], num_layers=c['n_layers'], dropout=conf['dropout']) elif encoder_name == 'bengio03highway': self.encoder = Bengio03HighwayBiLm(width=c['width'], input_size=c['projection_dim'], hidden_size=c['projection_dim'], n_layers=c['n_layers'], n_highway=c['n_highway'], use_position=c.get('position', False), dropout=conf['dropout']) elif encoder_name == 'bengio03highway_v2': self.encoder = Bengio03HighwayBiLmV2(width=c['width'], input_size=c['projection_dim'], hidden_size=c['projection_dim'], n_layers=c['n_layers'], n_highway=c['n_highway'], use_position=c.get('position', False), dropout=conf['dropout']) elif encoder_name == 'bengio03resnet': self.encoder = Bengio03ResNetBiLm(width=c['width'], input_size=c['projection_dim'], hidden_size=c['projection_dim'], n_layers=c['n_layers'], use_position=c.get('position', False), dropout=conf['dropout']) elif encoder_name == 'lblhighway': self.encoder = LBLHighwayBiLm(width=c['width'], input_size=c['projection_dim'], hidden_size=c['projection_dim'], n_layers=c['n_layers'], n_highway=c['n_highway'], use_position=c.get('position', False), dropout=conf['dropout']) elif encoder_name == 'lblhighway_v2': self.encoder = LBLHighwayBiLmV2(width=c['width'], input_size=c['projection_dim'], hidden_size=c['projection_dim'], n_layers=c['n_layers'], n_highway=c['n_highway'], use_position=c.get('position', False), dropout=conf['dropout']) elif encoder_name == 'lblresnet': self.encoder = LBLResNetBiLm(width=c['width'], input_size=c['projection_dim'], hidden_size=c['projection_dim'], n_layers=c['n_layers'], use_position=c.get('position', False), dropout=conf['dropout']) elif encoder_name == 'selfattn': self.encoder = SelfAttentiveLBLBiLM(width=c['width'], input_size=c['projection_dim'], hidden_size=c['projection_dim'], n_heads=c['n_heads'], n_layers=c['n_layers'], n_highway=c['n_highway'], use_position=c.get('position', False), use_relative_position=c.get('relative_position_weights', False), dropout=conf['dropout']) elif encoder_name == 'selfattn_v2': self.encoder = SelfAttentiveLBLBiLMV2(width=c['width'], input_size=c['projection_dim'], hidden_size=c['projection_dim'], n_heads=c['n_heads'], n_layers=c['n_layers'], n_highway=c['n_highway'], use_position=c.get('position', False), use_relative_position=c.get('relative_position_weights', False), dropout=conf['dropout']) elif encoder_name == 'selfattn_v3': self.encoder = SelfAttentiveLBLBiLMV3(width=c['width'], input_size=c['projection_dim'], hidden_size=c['projection_dim'], n_heads=c['n_heads'], n_layers=c['n_layers'], n_highway=c['n_highway'], use_position=c.get('position', False), use_relative_position=c.get('relative_position_weights', False), dropout=conf['dropout']) elif encoder_name == 'cnn': self.encoder = GatedCnnLm(input_size=c['projection_dim'], layers=c['layers'], dropout=conf['dropout']) else: raise ValueError('Unknown encoder name: {}'.format(encoder_name)) self.output_dim = conf['encoder']['projection_dim'] self.token_embedding_time = 0 self.encoding_time = 0