def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, contextualizer: Seq2SeqEncoder = None, dropout: float = 0.0, num_samples: int = None, sparse_embeddings: bool = False, bidirectional: bool = False, initializer=InitializerApplicator(), **kwargs) -> None: super().__init__(vocab, **kwargs) self._text_field_embedder = text_field_embedder self._contextualizer = contextualizer self._bidirectional = bidirectional if self._bidirectional: self._forward_dim = contextualizer.get_output_dim() // 2 else: self._forward_dim = contextualizer.get_output_dim() self._softmax_loss = SoftmaxLoss(num_words=vocab.get_vocab_size(), embedding_dim=self._forward_dim) self._perplexity = Perplexity() if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = lambda x: x if initializer is not None: initializer(self)
def on_vocab_update(self): num_words = vocabulary.words_vocab_size(self.backbone.vocab) if len(self._loss.softmax_b) != num_words: self._loss = SoftmaxLoss( num_words=num_words, embedding_dim=self._forward_dim, )
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, contextualizer: Seq2SeqEncoder, dropout: float = None, num_samples: int = None, sparse_embeddings: bool = False, bidirectional: bool = False, initializer: InitializerApplicator = None, **kwargs, ) -> None: super().__init__(vocab, **kwargs) self._text_field_embedder = text_field_embedder if contextualizer.is_bidirectional() is not bidirectional: raise ConfigurationError( "Bidirectionality of contextualizer must match bidirectionality of " "language model. " f"Contextualizer bidirectional: {contextualizer.is_bidirectional()}, " f"language model bidirectional: {bidirectional}") self._contextualizer = contextualizer self._bidirectional = bidirectional # The dimension for making predictions just in the forward # (or backward) direction. if self._bidirectional: self._forward_dim = contextualizer.get_output_dim() // 2 else: self._forward_dim = contextualizer.get_output_dim() if num_samples is not None: self._softmax_loss = SampledSoftmaxLoss( num_words=vocab.get_vocab_size("transactions"), embedding_dim=self._forward_dim, num_samples=num_samples, sparse=sparse_embeddings, ) else: self._softmax_loss = SoftmaxLoss( num_words=vocab.get_vocab_size("transactions"), embedding_dim=self._forward_dim, ) # This buffer is now unused and exists only for backwards compatibility reasons. self.register_buffer("_last_average_loss", torch.zeros(1)) self._perplexity = Perplexity() if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = lambda x: x if initializer is not None: initializer(self)
def test_sampled_equals_unsampled_during_eval(self): sampled_softmax = SampledSoftmaxLoss(num_words=10000, embedding_dim=12, num_samples=40) unsampled_softmax = SoftmaxLoss(num_words=10000, embedding_dim=12) sampled_softmax.eval() unsampled_softmax.eval() # set weights equal, use transpose because opposite shapes sampled_softmax.softmax_w.data = unsampled_softmax.softmax_w.t() sampled_softmax.softmax_b.data = unsampled_softmax.softmax_b # sequence_length, embedding_dim embedding = torch.rand(100, 12) targets = torch.randint(0, 1000, (100,)).long() full_loss = unsampled_softmax(embedding, targets).item() sampled_loss = sampled_softmax(embedding, targets).item() # Should be really close np.testing.assert_almost_equal(sampled_loss, full_loss)
def test_sampled_equals_unsampled_when_biased_against_non_sampled_positions( self): sampled_softmax = SampledSoftmaxLoss(num_words=10000, embedding_dim=12, num_samples=10) unsampled_softmax = SoftmaxLoss(num_words=10000, embedding_dim=12) # fake out choice function FAKE_SAMPLES = [100, 200, 300, 400, 500, 600, 700, 800, 900, 9999] def fake_choice(num_words: int, num_samples: int) -> Tuple[np.ndarray, int]: assert (num_words, num_samples) == (10000, 10) return np.array(FAKE_SAMPLES), 12 sampled_softmax.choice_func = fake_choice # bias out the unsampled terms: for i in range(10000): if i not in FAKE_SAMPLES: unsampled_softmax.softmax_b[i] = -10000 # set weights equal, use transpose because opposite shapes sampled_softmax.softmax_w.data = unsampled_softmax.softmax_w.t() sampled_softmax.softmax_b.data = unsampled_softmax.softmax_b sampled_softmax.train() unsampled_softmax.train() # sequence_length, embedding_dim embedding = torch.rand(100, 12) targets = torch.randint(0, 1000, (100, )).long() full_loss = unsampled_softmax(embedding, targets).item() sampled_loss = sampled_softmax(embedding, targets).item() # Should be close pct_error = (sampled_loss - full_loss) / full_loss assert abs(pct_error) < 0.001
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, contextualizer: Seq2SeqEncoder, hyperbolic_embedder: TextFieldEmbedder, hyperbolic_encoder: Seq2VecEncoder, hyperbolic_weight: float, is_baseline: bool = False, dropout: float = None, num_samples: int = None, sparse_embeddings: bool = False, bidirectional: bool = False, initializer: InitializerApplicator = None, ) -> None: super().__init__( vocab, text_field_embedder, contextualizer, dropout, num_samples, sparse_embeddings, bidirectional, initializer ) # reinitialize self._softmax_loss to change default namespace 'token' if num_samples is not None: self._softmax_loss = SampledSoftmaxLoss( num_words=vocab.get_vocab_size(namespace='euclidean'), embedding_dim=self._forward_dim, num_samples=num_samples, sparse=sparse_embeddings, ) else: self._softmax_loss = SoftmaxLoss( num_words=vocab.get_vocab_size(namespace='euclidean'), embedding_dim=self._forward_dim ) # initialize hyperbolic components self._hyperbolic_embedder = hyperbolic_embedder self._hyperbolic_encoder = hyperbolic_encoder self._hyperbolic_encoding_loss = HyperbolicL1() self._hyperbolic_weight = hyperbolic_weight # vanila language mode self.is_baseline = is_baseline
def __init__( self, backbone: ModelBackbone, dropout: float = None, bidirectional: bool = False, ) -> None: super(LanguageModelling, self).__init__(backbone) self._empty_prediction = LanguageModellingPrediction( lm_embeddings=numpy.array([]), mask=numpy.array([])) self.bidirectional = bidirectional if not backbone.featurizer.has_word_features: raise ConfigurationError( "`LanguageModelling` defines a word-level next token language model. " "Please check your `features` configuration to enable at least `words` features." ) if backbone.encoder.is_bidirectional() is not bidirectional: raise ConfigurationError( "Bidirectionality of contextualizer must match bidirectionality of " "language model. " f"Contextualizer bidirectional: {backbone.encoder.is_bidirectional()}, " f"language model bidirectional: {bidirectional}") if self.bidirectional: self._forward_dim = backbone.encoder.get_output_dim() // 2 else: self._forward_dim = backbone.encoder.get_output_dim() if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = lambda x: x self._metrics = Metrics(perplexity={"type": "perplexity"}) self._loss = SoftmaxLoss( num_words=vocabulary.words_vocab_size(self.backbone.vocab), embedding_dim=self._forward_dim, )
def __init__(self, backbone: ModelBackbone, dropout: float = None) -> None: super(LanguageModelling, self).__init__(backbone) if not backbone.featurizer.has_word_features: raise ConfigurationError( "`LanguageModelling` defines a word-level next token language model. " "Please check your `features` configuration to enable at least `words` features." ) self._forward_dim = backbone.encoder.get_output_dim() if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = lambda x: x self.metrics = {"perplexity": Perplexity()} self._loss = SoftmaxLoss( num_words=vocabulary.words_vocab_size(self.backbone.vocab), embedding_dim=self.backbone.encoder.get_output_dim(), )
def on_vocab_update(self): self._loss = SoftmaxLoss( num_words=vocabulary.words_vocab_size(self.backbone.vocab), embedding_dim=self._forward_dim, )
def on_vocab_update(self): self._loss = SoftmaxLoss( num_words=vocabulary.words_vocab_size(self.backbone.vocab), embedding_dim=self.backbone.encoder.get_output_dim(), )