def __init__(self, vocab: Vocabulary, bert_embedder: Optional[PretrainedBertEmbedder] = None, encoder: Optional[Seq2SeqEncoder] = None, dropout: Optional[float] = None, use_crf: bool = True) -> None: super().__init__(vocab) if bert_embedder: self.use_bert = True self.bert_embedder = bert_embedder else: self.use_bert = False self.basic_embedder = BasicTextFieldEmbedder({ "tokens": Embedding(vocab.get_vocab_size(namespace="tokens"), 1024) }) self.rnn = Seq2SeqEncoder.from_params(Params({ "type": "lstm", "input_size": 1024, "hidden_size": 512, "bidirectional": True, "batch_first": True })) self.encoder = encoder if encoder: hidden2tag_in_dim = encoder.get_output_dim() else: hidden2tag_in_dim = bert_embedder.get_output_dim() self.hidden2tag = TimeDistributed(torch.nn.Linear( in_features=hidden2tag_in_dim, out_features=vocab.get_vocab_size("labels"))) if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self.use_crf = use_crf if use_crf: crf_constraints = allowed_transitions( constraint_type="BIO", labels=vocab.get_index_to_token_vocabulary("labels") ) self.crf = ConditionalRandomField( num_tags=vocab.get_vocab_size("labels"), constraints=crf_constraints, include_start_end_transitions=True ) self.f1 = SpanBasedF1Measure(vocab, tag_namespace="labels", ignore_classes=["news/type","negation", "demonstrative_reference", "timer/noun","timer/attributes"], label_encoding="BIO")
def test_stacked_bidirectional_lstm_can_build_from_params(self): params = Params({"type": "stacked_bidirectional_lstm", "input_size": 5, "hidden_size": 9, "num_layers": 3}) encoder = Seq2SeqEncoder.from_params(params) assert encoder.get_input_dim() == 5 assert encoder.get_output_dim() == 18 assert encoder.is_bidirectional
def test_stacked_bidirectional_lstm_can_build_from_params(self): params = Params({"type": "stacked_bidirectional_lstm", "input_size": 5, "hidden_size": 9, "num_layers": 3}) encoder = Seq2SeqEncoder.from_params(params) assert encoder.get_input_dim() == 5 assert encoder.get_output_dim() == 18 assert encoder.is_bidirectional
vocab = Vocabulary.from_instances(train_dataset + validation_dataset) EMBEDDING_DIM = 100 HIDDEN_DIM = 200 model_params = Params({ 'type': 'lstm', 'input_size': EMBEDDING_DIM, 'hidden_size': HIDDEN_DIM }) token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'), embedding_dim=EMBEDDING_DIM) word_embedding = BasicTextFieldEmbedder({'tokens': token_embedding}) lstm = Seq2SeqEncoder.from_params(model_params) model = POSTagger(word_embedding, lstm, vocab) optimizer = optim.Adam(model.parameters()) iterator = BucketIterator(batch_size=64, sorting_keys=[('sentence', 'num_tokens')]) iterator.index_with(vocab) trainer = Trainer(model=model, optimizer=optimizer, iterator=iterator, train_dataset=train_dataset, validation_dataset=validation_dataset, patience=10,
def __init__(self, vocab: Vocabulary, bert_embedder: Optional[PretrainedBertEmbedder] = None, encoder: Optional[Seq2SeqEncoder] = None, dropout: Optional[float] = None, use_crf: bool = True, add_random_noise: bool = False, add_attack_noise: bool = False, do_noise_normalization: bool = True, noise_norm: Optional[float] = None, noise_loss_prob: Optional[float] = None, add_noise_for: str = "ov", rnn_after_embeddings: bool = False, open_vocabulary_slots: Optional[List[str]] = None, metrics_for_each_slot_type: bool = False) -> None: """ Params ------ vocab: the allennlp Vocabulary object, will be automatically passed bert_embedder: the pretrained BERT embedder. If it is not None, the pretrained BERT embedding (parameter fixed) will be used as the embedding layer. Otherwise, a look-up embedding matrix will be initialized with the embedding size 1024. The default is None. encoder: the contextual encoder used after the embedding layer. If set to None, no contextual encoder will be used. dropout: the dropout rate, won't be set in all our experiments. use_crf: if set to True, CRF will be used at the end of the model (as output layer). Otherwise, a softmax layer (with cross-entropy loss) will be used. add_random_noise: whether to add random noise to slots. Can not be set simultaneously with add_attack_noise. This setting is used as baseline in our experiments. add_attack_noise: whether to add adversarial attack noise to slots. Can not be set simultaneously with add_random_noise. do_noise_normalization: if set to True, the normalization will be applied to gradients w.r.t. token embeddings. Otherwise, the gradients won't be normalized. noise_norm: the normalization norm (L2) applied to gradients. noise_loss_prob: the alpha hyperparameter to balance the loss from normal forward and adversarial forward. See the paper for more details. Should be set from 0 to 1. add_noise_for: if set to ov, the noise will only be applied to open-vocabulary slots. Otherwise, the noise will be applied to all slots (both open-vocabulary and normal slots). rnn_after_embeddings: if set to True, an additional BiLSTM layer will be applied after the embedding layer. Default is False. open_vocabulary_slots: the list of open-vocabulary slots. If not set, will be set to open-vocabulary slots of Snips dataset by default. metrics_for_each_slot_type: whether to log metrics for each slot type. Default is False. """ super().__init__(vocab) if bert_embedder: self.use_bert = True self.bert_embedder = bert_embedder else: self.use_bert = False self.basic_embedder = BasicTextFieldEmbedder({ "tokens": Embedding(vocab.get_vocab_size(namespace="tokens"), 1024) }) self.rnn_after_embeddings = rnn_after_embeddings if rnn_after_embeddings: self.rnn = Seq2SeqEncoder.from_params( Params({ "type": "lstm", "input_size": 1024, "hidden_size": 512, "bidirectional": True, "batch_first": True })) self.encoder = encoder if encoder: hidden2tag_in_dim = encoder.get_output_dim() else: hidden2tag_in_dim = bert_embedder.get_output_dim() self.hidden2tag = TimeDistributed( torch.nn.Linear(in_features=hidden2tag_in_dim, out_features=vocab.get_vocab_size("labels"))) if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self.use_crf = use_crf if use_crf: crf_constraints = allowed_transitions( constraint_type="BIO", labels=vocab.get_index_to_token_vocabulary("labels")) self.crf = ConditionalRandomField( num_tags=vocab.get_vocab_size("labels"), constraints=crf_constraints, include_start_end_transitions=True) # default open_vocabulary slots: for SNIPS dataset open_vocabulary_slots = open_vocabulary_slots or [ "playlist", "entity_name", "poi", "restaurant_name", "geographic_poi", "album", "track", "object_name", "movie_name" ] self.f1 = OVSpecSpanBasedF1Measure( vocab, tag_namespace="labels", ignore_classes=[], label_encoding="BIO", open_vocabulary_slots=open_vocabulary_slots) self.add_random_noise = add_random_noise self.add_attack_noise = add_attack_noise assert not (add_random_noise and add_attack_noise), "both random and attack noise applied" if add_random_noise or add_attack_noise: self.do_noise_normalization = do_noise_normalization assert noise_norm is not None assert noise_loss_prob is not None and 0. <= noise_loss_prob <= 1. self.noise_norm = noise_norm self.noise_loss_prob = noise_loss_prob assert add_noise_for in ["ov", "all"] self.ov_noise_only = (add_noise_for == "ov") self.metrics_for_each_slot_type = metrics_for_each_slot_type