def test_scalar_mix_layer_norm(self): mixture = ScalarMix(3, do_layer_norm="scalar_norm_reg") tensors = [torch.randn([3, 4, 5]) for _ in range(3)] numpy_mask = numpy.ones((3, 4), dtype="int32") numpy_mask[1, 2:] = 0 mask = torch.from_numpy(numpy_mask).bool() weights = [0.1, 0.2, 0.3] for k in range(3): mixture.scalar_parameters[k].data[0] = weights[k] mixture.gamma.data[0] = 0.5 result = mixture(tensors, mask) normed_weights = numpy.exp(weights) / numpy.sum(numpy.exp(weights)) expected_result = numpy.zeros((3, 4, 5)) for k in range(3): mean = numpy.mean(tensors[k].data.numpy()[numpy_mask == 1]) std = numpy.std(tensors[k].data.numpy()[numpy_mask == 1]) normed_tensor = (tensors[k].data.numpy() - mean) / ( std + util.tiny_value_of_dtype(torch.float)) expected_result += normed_tensor * normed_weights[k] expected_result *= 0.5 numpy.testing.assert_almost_equal(expected_result, result.data.numpy(), decimal=6)
def test_scalar_mix_trainable_with_initial_scalar_parameters(self): initial_scalar_parameters = [1.0, 2.0, 3.0] mixture = ScalarMix( 3, initial_scalar_parameters=initial_scalar_parameters, trainable=False) for i, scalar_mix_parameter in enumerate(mixture.scalar_parameters): assert scalar_mix_parameter.requires_grad is False assert scalar_mix_parameter.item() == initial_scalar_parameters[i]
def __init__(self, vocab: Vocabulary, word_embedding: Dict[str, Any], depsawr: torch.nn.Module = None, transform_dim: int = 0, pos_dim: int = 50, indicator_dim: int = 50, encoder: Dict[str, Any] = None, dropout: float = 0.33, label_namespace: str = "labels", top_k: int = 1, **kwargs) -> None: super().__init__() self.word_embedding = build_word_embedding(num_embeddings=len( vocab['words']), vocab=vocab, dropout=dropout, **word_embedding) feat_dim: int = self.word_embedding.output_dim if transform_dim > 0: self.word_transform = NonLinear(feat_dim, transform_dim) feat_dim: int = transform_dim else: self.word_transform = None if depsawr: dep_dim = kwargs.pop('dep_dim', 300) self.depsawr_forward = depsawr.forward self.projections = ModuleList( [NonLinear(i, dep_dim) for i in depsawr.dims]) self.depsawr_mix = ScalarMix(len(depsawr.dims), True) feat_dim += dep_dim else: self.depsawr_forward = None self.pos_embedding = Embedding(len(vocab['upostag']), pos_dim, 0) self.indicator_embedding = Embedding(2, indicator_dim) feat_dim += (pos_dim + indicator_dim) if encoder is not None: self.encoder = build_encoder(feat_dim, dropout=dropout, **encoder) feat_dim = self.encoder.output_dim else: self.encoder = None self.tag_projection_layer = torch.nn.Linear( feat_dim, len(vocab[label_namespace])) self.word_dropout = WordDropout(dropout) self.crf = ConditionalRandomField(len(vocab[label_namespace]), include_start_end_transitions=False) self.top_k = top_k self.metric = SRLMetric(vocab[label_namespace]['_'])
def test_scalar_mix_can_run_forward(self): mixture = ScalarMix(3) tensors = [torch.randn([3, 4, 5]) for _ in range(3)] for k in range(3): mixture.scalar_parameters[k].data[0] = 0.1 * (k + 1) mixture.gamma.data[0] = 0.5 result = mixture(tensors) weights = [0.1, 0.2, 0.3] normed_weights = numpy.exp(weights) / numpy.sum(numpy.exp(weights)) expected_result = sum(normed_weights[k] * tensors[k].data.numpy() for k in range(3)) expected_result *= 0.5 numpy.testing.assert_almost_equal(expected_result, result.data.numpy())
def __init__(self, bert_servant, bert_batch_size=1, rnn_size_in=(1024, 1024 + 300), rnn_size_out=(300, 300), max_l=300, mlp_d=300, num_of_class=3, drop_r=0.5, activation_type='gelu'): super(Model, self).__init__() self.bert_mix_scalar = ScalarMix(4) self.esim_layer = ESIM(rnn_size_in, rnn_size_out, max_l, mlp_d, num_of_class, drop_r, activation_type) self.bert_servant = bert_servant self.bert_batch_size = bert_batch_size
def test_scalar_mix_layer_norm(self): mixture = ScalarMix(3, do_layer_norm='scalar_norm_reg') tensors = [Variable(torch.randn([3, 4, 5])) for _ in range(3)] numpy_mask = numpy.ones((3, 4), dtype='int32') numpy_mask[1, 2:] = 0 mask = Variable(torch.from_numpy(numpy_mask)) weights = [0.1, 0.2, 0.3] for k in range(3): mixture.scalar_parameters[k].data[0] = weights[k] mixture.gamma.data[0] = 0.5 result = mixture(tensors, mask) normed_weights = numpy.exp(weights) / numpy.sum(numpy.exp(weights)) expected_result = numpy.zeros((3, 4, 5)) for k in range(3): mean = numpy.mean(tensors[k].data.numpy()[numpy_mask == 1]) std = numpy.std(tensors[k].data.numpy()[numpy_mask == 1]) normed_tensor = (tensors[k].data.numpy() - mean) / (std + 1E-12) expected_result += (normed_tensor * normed_weights[k]) expected_result *= 0.5 numpy.testing.assert_almost_equal(expected_result, result.data.numpy())
def test_scalar_mix_throws_error_on_incorrect_initial_scalar_parameters_length( self): with pytest.raises(ConfigurationError): ScalarMix(3, initial_scalar_parameters=[0.0, 0.0])
def test_scalar_mix_throws_error_on_incorrect_number_of_inputs(self): mixture = ScalarMix(3) tensors = [torch.randn([3, 4, 5]) for _ in range(5)] with pytest.raises(ConfigurationError): _ = mixture(tensors)
def __init__(self, vocab: Vocabulary, roberta_type: str, train_roberta: bool = False, encoder: Seq2SeqEncoder = None, label_namespace: str = "labels", feedforward: Optional[FeedForward] = None, include_start_end_transitions: bool = True, dropout: Optional[float] = None, use_upos_constraints: bool = True, use_lemma_constraints: bool = True, train_with_constraints: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.roberta_config = AutoConfig.from_pretrained( f"roberta-{roberta_type}") self.roberta_config.output_hidden_states = True self.roberta = AutoModel.from_pretrained(f"roberta-{roberta_type}", config=self.roberta_config) self.scalar_mix = ScalarMix(self.roberta.config.num_hidden_layers + 1) for parameter in self.roberta.parameters(): parameter.requires_grad = train_roberta self.num_tags = self.vocab.get_vocab_size(label_namespace) self.train_with_constraints = train_with_constraints self.encoder = encoder if self.encoder is not None: encoder_output_dim = self.encoder.get_output_dim() else: encoder_output_dim = self.roberta.config.hidden_size if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self.feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = encoder_output_dim self.tag_projection_layer = TimeDistributed( Linear(output_dim, self.num_tags)) self._label_namespace = label_namespace labels = self.vocab.get_index_to_token_vocabulary( self._label_namespace) constraints = streusle_allowed_transitions(labels) self.use_upos_constraints = use_upos_constraints self.use_lemma_constraints = use_lemma_constraints if self.use_lemma_constraints and not self.use_upos_constraints: raise ConfigurationError( "If lemma constraints are applied, UPOS constraints must be applied as well." ) if self.use_upos_constraints: # Get a dict with a mapping from UPOS to allowed LEXCAT here. self._upos_to_allowed_lexcats: Dict[ str, Set[str]] = get_upos_allowed_lexcats( stronger_constraints=self.use_lemma_constraints) # Dict with a amapping from UPOS to dictionary of [UPOS, list of additionally allowed LEXCATS] self._lemma_to_allowed_lexcats: Dict[str, Dict[ str, List[str]]] = get_lemma_allowed_lexcats() # Use labels and the upos_to_allowed_lexcats to get a dict with # a mapping from UPOS to a mask with 1 at allowed label indices and 0 at # disallowed label indices. self._upos_to_label_mask: Dict[str, torch.Tensor] = {} for upos in ALL_UPOS: # Shape: (num_labels,) upos_label_mask = torch.zeros( len(labels), device=next(self.tag_projection_layer.parameters()).device) # Go through the labels and indices and fill in the values that are allowed. for label_index, label in labels.items(): if len(label.split("-")) == 1: upos_label_mask[label_index] = 1 continue label_lexcat = label.split("-")[1] if not label.startswith("O-") and not label.startswith( "o-"): # Label does not start with O-/o-, always allowed. upos_label_mask[label_index] = 1 elif label_lexcat in self._upos_to_allowed_lexcats[upos]: # Label starts with O-/o-, but the lexcat is in allowed # lexcats for the current upos. upos_label_mask[label_index] = 1 self._upos_to_label_mask[upos] = upos_label_mask # Use labels and the lemma_to_allowed_lexcats to get a dict with # a mapping from lemma to a mask with 1 at an _additionally_ allowed label index # and 0 at disallowed label indices. If lemma_to_label_mask has a 0, and upos_to_label_mask # has a 0, the lexcat is not allowed for the (upos, lemma). If either lemma_to_label_mask or # upos_to_label_mask has a 1, the lexcat is allowed for the (upos, lemma) pair. self._lemma_upos_to_label_mask: Dict[Tuple[str, str], torch.Tensor] = {} for lemma in SPECIAL_LEMMAS: for upos_tag in ALL_UPOS: # No additional constraints, should be all zero if upos_tag not in self._lemma_to_allowed_lexcats[lemma]: continue # Shape: (num_labels,) lemma_upos_label_mask = torch.zeros( len(labels), device=next( self.tag_projection_layer.parameters()).device) # Go through the labels and indices and fill in the values that are allowed. for label_index, label in labels.items(): # For ~i, etc. tags. We don't deal with them here. if len(label.split("-")) == 1: continue label_lexcat = label.split("-")[1] if not label.startswith("O-") and not label.startswith( "o-"): # Label does not start with O-/o-, so we don't deal with it here continue if label_lexcat in self._lemma_to_allowed_lexcats[ lemma][upos_tag]: # Label starts with O-/o-, but the lexcat is in allowed # lexcats for the current upos. lemma_upos_label_mask[label_index] = 1 self._lemma_upos_to_label_mask[( lemma, upos_tag)] = lemma_upos_label_mask self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } if encoder is not None: check_dimensions_match(self.roberta.config.hidden_size, encoder.get_input_dim(), "roberta embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__(self, vocab: Vocabulary, base_dim, loss_scale_by_num_values, use_pre_calc_elmo_embeddings, elmo_embedding_path, domain_slot_list_path, word_embeddings, token_indexers: Dict[str, TokenIndexer], text_field_embedder: TextFieldEmbedder, text_field_char_embedder: TextFieldEmbedder, symbol_embedder: TextFieldEmbedder, phrase_layer: Seq2SeqEncoder, class_prediction_layer: FeedForward, span_prediction_layer: FeedForward, span_start_encoder: FeedForward, span_end_encoder: FeedForward, span_label_predictor: FeedForward, initializer: InitializerApplicator, use_graph, bi_dropout: float = 0.2, dropout: float = 0.2) -> None: super().__init__(vocab) self._is_in_training_mode = False self._loss_scale_by_num_values = loss_scale_by_num_values self._use_pre_calc_elmo_embeddings = use_pre_calc_elmo_embeddings self._word_embeddings = word_embeddings self._is_use_elmo = True if self._word_embeddings == "elmo" else False self._is_use_graph = use_graph if self._is_use_elmo and use_pre_calc_elmo_embeddings: self._dialog_elmo_embeddings = self.load_elmo_embeddings(elmo_embedding_path) self._dialog_scalar_mix = ScalarMix(mixture_size = 3, trainable=True) self._domains, self._ds_id2text, self._ds_text2id, self.value_file_path, \ self._ds_type, self._ds_use_value_list, num_ds_use_value, self._ds_masked \ = self.read_domain_slot_list(domain_slot_list_path) self._value_id2text, self._value_text2id = self.load_value_list(domain_slot_list_path) self._span_id2text, self._class_id2text = dstqa_util.gen_id2text(self._ds_id2text, self._ds_type) self._token_indexers = token_indexers self._text_field_embedder = text_field_embedder self._text_field_char_embedder = text_field_char_embedder self._symbol_embedder = symbol_embedder self._ds_dialog_attention = LinearMatrixAttention(base_dim, base_dim, 'x,y,x*y') self._dialog_dsv_attention = LinearMatrixAttention(base_dim, base_dim, 'x,y,x*y') self._dsv_dialog_attention = LinearMatrixAttention(base_dim, base_dim, 'x,y,x*y') self._ds_attention = LinearMatrixAttention(base_dim, base_dim, 'x,y,x*y') self._dsv_attention = LinearMatrixAttention(base_dim, base_dim, 'x,y,x*y') self._agg_value = torch.nn.Linear(base_dim, base_dim) self._agg_nodes = torch.nn.Linear(base_dim, base_dim) self._graph_gamma = torch.nn.Linear(base_dim, 1) self._class_prediction_layer = class_prediction_layer self._span_prediction_layer = span_prediction_layer self._span_label_predictor = span_label_predictor self._span_start_encoder = span_start_encoder self._span_end_encoder = span_end_encoder self._phrase_layer = phrase_layer self._cross_entropy = CrossEntropyLoss(ignore_index=-1) self._accuracy = Accuracy(self._ds_id2text, self._ds_type) self._dropout = torch.nn.Dropout(dropout) self._bi_dropout = torch.nn.Dropout(bi_dropout) self._dropout2 = torch.nn.Dropout(0.1) self._sigmoid = torch.nn.Sigmoid() initializer(self)