コード例 #1
0
    def test_scalar_mix_layer_norm(self):
        mixture = ScalarMix(3, do_layer_norm="scalar_norm_reg")

        tensors = [torch.randn([3, 4, 5]) for _ in range(3)]
        numpy_mask = numpy.ones((3, 4), dtype="int32")
        numpy_mask[1, 2:] = 0
        mask = torch.from_numpy(numpy_mask).bool()

        weights = [0.1, 0.2, 0.3]
        for k in range(3):
            mixture.scalar_parameters[k].data[0] = weights[k]
        mixture.gamma.data[0] = 0.5
        result = mixture(tensors, mask)

        normed_weights = numpy.exp(weights) / numpy.sum(numpy.exp(weights))
        expected_result = numpy.zeros((3, 4, 5))
        for k in range(3):
            mean = numpy.mean(tensors[k].data.numpy()[numpy_mask == 1])
            std = numpy.std(tensors[k].data.numpy()[numpy_mask == 1])
            normed_tensor = (tensors[k].data.numpy() - mean) / (
                std + util.tiny_value_of_dtype(torch.float))
            expected_result += normed_tensor * normed_weights[k]
        expected_result *= 0.5

        numpy.testing.assert_almost_equal(expected_result,
                                          result.data.numpy(),
                                          decimal=6)
コード例 #2
0
 def test_scalar_mix_trainable_with_initial_scalar_parameters(self):
     initial_scalar_parameters = [1.0, 2.0, 3.0]
     mixture = ScalarMix(
         3,
         initial_scalar_parameters=initial_scalar_parameters,
         trainable=False)
     for i, scalar_mix_parameter in enumerate(mixture.scalar_parameters):
         assert scalar_mix_parameter.requires_grad is False
         assert scalar_mix_parameter.item() == initial_scalar_parameters[i]
コード例 #3
0
    def __init__(self,
                 vocab: Vocabulary,
                 word_embedding: Dict[str, Any],
                 depsawr: torch.nn.Module = None,
                 transform_dim: int = 0,
                 pos_dim: int = 50,
                 indicator_dim: int = 50,
                 encoder: Dict[str, Any] = None,
                 dropout: float = 0.33,
                 label_namespace: str = "labels",
                 top_k: int = 1,
                 **kwargs) -> None:
        super().__init__()
        self.word_embedding = build_word_embedding(num_embeddings=len(
            vocab['words']),
                                                   vocab=vocab,
                                                   dropout=dropout,
                                                   **word_embedding)
        feat_dim: int = self.word_embedding.output_dim

        if transform_dim > 0:
            self.word_transform = NonLinear(feat_dim, transform_dim)
            feat_dim: int = transform_dim
        else:
            self.word_transform = None

        if depsawr:
            dep_dim = kwargs.pop('dep_dim', 300)
            self.depsawr_forward = depsawr.forward
            self.projections = ModuleList(
                [NonLinear(i, dep_dim) for i in depsawr.dims])
            self.depsawr_mix = ScalarMix(len(depsawr.dims), True)
            feat_dim += dep_dim
        else:
            self.depsawr_forward = None

        self.pos_embedding = Embedding(len(vocab['upostag']), pos_dim, 0)
        self.indicator_embedding = Embedding(2, indicator_dim)
        feat_dim += (pos_dim + indicator_dim)

        if encoder is not None:
            self.encoder = build_encoder(feat_dim, dropout=dropout, **encoder)
            feat_dim = self.encoder.output_dim
        else:
            self.encoder = None

        self.tag_projection_layer = torch.nn.Linear(
            feat_dim, len(vocab[label_namespace]))
        self.word_dropout = WordDropout(dropout)
        self.crf = ConditionalRandomField(len(vocab[label_namespace]),
                                          include_start_end_transitions=False)
        self.top_k = top_k
        self.metric = SRLMetric(vocab[label_namespace]['_'])
コード例 #4
0
ファイル: scalar_mix_test.py プロジェクト: pelovett/allennlp
    def test_scalar_mix_can_run_forward(self):
        mixture = ScalarMix(3)
        tensors = [torch.randn([3, 4, 5]) for _ in range(3)]
        for k in range(3):
            mixture.scalar_parameters[k].data[0] = 0.1 * (k + 1)
        mixture.gamma.data[0] = 0.5
        result = mixture(tensors)

        weights = [0.1, 0.2, 0.3]
        normed_weights = numpy.exp(weights) / numpy.sum(numpy.exp(weights))
        expected_result = sum(normed_weights[k] * tensors[k].data.numpy() for k in range(3))
        expected_result *= 0.5
        numpy.testing.assert_almost_equal(expected_result, result.data.numpy())
コード例 #5
0
    def __init__(self,
                 bert_servant,
                 bert_batch_size=1,
                 rnn_size_in=(1024, 1024 + 300),
                 rnn_size_out=(300, 300),
                 max_l=300,
                 mlp_d=300,
                 num_of_class=3,
                 drop_r=0.5,
                 activation_type='gelu'):

        super(Model, self).__init__()
        self.bert_mix_scalar = ScalarMix(4)
        self.esim_layer = ESIM(rnn_size_in, rnn_size_out, max_l, mlp_d,
                               num_of_class, drop_r, activation_type)
        self.bert_servant = bert_servant
        self.bert_batch_size = bert_batch_size
コード例 #6
0
    def test_scalar_mix_layer_norm(self):
        mixture = ScalarMix(3, do_layer_norm='scalar_norm_reg')

        tensors = [Variable(torch.randn([3, 4, 5])) for _ in range(3)]
        numpy_mask = numpy.ones((3, 4), dtype='int32')
        numpy_mask[1, 2:] = 0
        mask = Variable(torch.from_numpy(numpy_mask))

        weights = [0.1, 0.2, 0.3]
        for k in range(3):
            mixture.scalar_parameters[k].data[0] = weights[k]
        mixture.gamma.data[0] = 0.5
        result = mixture(tensors, mask)

        normed_weights = numpy.exp(weights) / numpy.sum(numpy.exp(weights))
        expected_result = numpy.zeros((3, 4, 5))
        for k in range(3):
            mean = numpy.mean(tensors[k].data.numpy()[numpy_mask == 1])
            std = numpy.std(tensors[k].data.numpy()[numpy_mask == 1])
            normed_tensor = (tensors[k].data.numpy() - mean) / (std + 1E-12)
            expected_result += (normed_tensor * normed_weights[k])
        expected_result *= 0.5

        numpy.testing.assert_almost_equal(expected_result, result.data.numpy())
コード例 #7
0
 def test_scalar_mix_throws_error_on_incorrect_initial_scalar_parameters_length(
         self):
     with pytest.raises(ConfigurationError):
         ScalarMix(3, initial_scalar_parameters=[0.0, 0.0])
コード例 #8
0
 def test_scalar_mix_throws_error_on_incorrect_number_of_inputs(self):
     mixture = ScalarMix(3)
     tensors = [torch.randn([3, 4, 5]) for _ in range(5)]
     with pytest.raises(ConfigurationError):
         _ = mixture(tensors)
コード例 #9
0
    def __init__(self,
                 vocab: Vocabulary,
                 roberta_type: str,
                 train_roberta: bool = False,
                 encoder: Seq2SeqEncoder = None,
                 label_namespace: str = "labels",
                 feedforward: Optional[FeedForward] = None,
                 include_start_end_transitions: bool = True,
                 dropout: Optional[float] = None,
                 use_upos_constraints: bool = True,
                 use_lemma_constraints: bool = True,
                 train_with_constraints: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.roberta_config = AutoConfig.from_pretrained(
            f"roberta-{roberta_type}")
        self.roberta_config.output_hidden_states = True
        self.roberta = AutoModel.from_pretrained(f"roberta-{roberta_type}",
                                                 config=self.roberta_config)
        self.scalar_mix = ScalarMix(self.roberta.config.num_hidden_layers + 1)

        for parameter in self.roberta.parameters():
            parameter.requires_grad = train_roberta

        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.train_with_constraints = train_with_constraints

        self.encoder = encoder
        if self.encoder is not None:
            encoder_output_dim = self.encoder.get_output_dim()
        else:
            encoder_output_dim = self.roberta.config.hidden_size
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self.feedforward = feedforward

        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        else:
            output_dim = encoder_output_dim
        self.tag_projection_layer = TimeDistributed(
            Linear(output_dim, self.num_tags))
        self._label_namespace = label_namespace
        labels = self.vocab.get_index_to_token_vocabulary(
            self._label_namespace)
        constraints = streusle_allowed_transitions(labels)

        self.use_upos_constraints = use_upos_constraints
        self.use_lemma_constraints = use_lemma_constraints

        if self.use_lemma_constraints and not self.use_upos_constraints:
            raise ConfigurationError(
                "If lemma constraints are applied, UPOS constraints must be applied as well."
            )

        if self.use_upos_constraints:
            # Get a dict with a mapping from UPOS to allowed LEXCAT here.
            self._upos_to_allowed_lexcats: Dict[
                str, Set[str]] = get_upos_allowed_lexcats(
                    stronger_constraints=self.use_lemma_constraints)
            # Dict with a amapping from UPOS to dictionary of [UPOS, list of additionally allowed LEXCATS]
            self._lemma_to_allowed_lexcats: Dict[str, Dict[
                str, List[str]]] = get_lemma_allowed_lexcats()

            # Use labels and the upos_to_allowed_lexcats to get a dict with
            # a mapping from UPOS to a mask with 1 at allowed label indices and 0 at
            # disallowed label indices.
            self._upos_to_label_mask: Dict[str, torch.Tensor] = {}
            for upos in ALL_UPOS:
                # Shape: (num_labels,)
                upos_label_mask = torch.zeros(
                    len(labels),
                    device=next(self.tag_projection_layer.parameters()).device)
                # Go through the labels and indices and fill in the values that are allowed.
                for label_index, label in labels.items():
                    if len(label.split("-")) == 1:
                        upos_label_mask[label_index] = 1
                        continue
                    label_lexcat = label.split("-")[1]
                    if not label.startswith("O-") and not label.startswith(
                            "o-"):
                        # Label does not start with O-/o-, always allowed.
                        upos_label_mask[label_index] = 1
                    elif label_lexcat in self._upos_to_allowed_lexcats[upos]:
                        # Label starts with O-/o-, but the lexcat is in allowed
                        # lexcats for the current upos.
                        upos_label_mask[label_index] = 1
                self._upos_to_label_mask[upos] = upos_label_mask

            # Use labels and the lemma_to_allowed_lexcats to get a dict with
            # a mapping from lemma to a mask with 1 at an _additionally_ allowed label index
            # and 0 at disallowed label indices. If lemma_to_label_mask has a 0, and upos_to_label_mask
            # has a 0, the lexcat is not allowed for the (upos, lemma). If either lemma_to_label_mask or
            # upos_to_label_mask has a 1, the lexcat is allowed for the (upos, lemma) pair.
            self._lemma_upos_to_label_mask: Dict[Tuple[str, str],
                                                 torch.Tensor] = {}
            for lemma in SPECIAL_LEMMAS:
                for upos_tag in ALL_UPOS:
                    # No additional constraints, should be all zero
                    if upos_tag not in self._lemma_to_allowed_lexcats[lemma]:
                        continue
                    # Shape: (num_labels,)
                    lemma_upos_label_mask = torch.zeros(
                        len(labels),
                        device=next(
                            self.tag_projection_layer.parameters()).device)
                    # Go through the labels and indices and fill in the values that are allowed.
                    for label_index, label in labels.items():
                        # For ~i, etc. tags. We don't deal with them here.
                        if len(label.split("-")) == 1:
                            continue
                        label_lexcat = label.split("-")[1]
                        if not label.startswith("O-") and not label.startswith(
                                "o-"):
                            # Label does not start with O-/o-, so we don't deal with it here
                            continue
                        if label_lexcat in self._lemma_to_allowed_lexcats[
                                lemma][upos_tag]:
                            # Label starts with O-/o-, but the lexcat is in allowed
                            # lexcats for the current upos.
                            lemma_upos_label_mask[label_index] = 1
                    self._lemma_upos_to_label_mask[(
                        lemma, upos_tag)] = lemma_upos_label_mask

        self.include_start_end_transitions = include_start_end_transitions
        self.crf = ConditionalRandomField(
            self.num_tags,
            constraints,
            include_start_end_transitions=include_start_end_transitions)

        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3)
        }
        if encoder is not None:
            check_dimensions_match(self.roberta.config.hidden_size,
                                   encoder.get_input_dim(),
                                   "roberta embedding dim",
                                   "encoder input dim")
        if feedforward is not None:
            check_dimensions_match(encoder.get_output_dim(),
                                   feedforward.get_input_dim(),
                                   "encoder output dim",
                                   "feedforward input dim")
        initializer(self)
コード例 #10
0
ファイル: dstqa.py プロジェクト: stainswei/dstqa
  def __init__(self, vocab: Vocabulary,
             base_dim,
             loss_scale_by_num_values,
             use_pre_calc_elmo_embeddings,
             elmo_embedding_path,
             domain_slot_list_path,
             word_embeddings,
             token_indexers: Dict[str, TokenIndexer],
             text_field_embedder: TextFieldEmbedder,
             text_field_char_embedder: TextFieldEmbedder,
             symbol_embedder: TextFieldEmbedder,
             phrase_layer: Seq2SeqEncoder,
             class_prediction_layer: FeedForward,
             span_prediction_layer: FeedForward,
             span_start_encoder: FeedForward,
             span_end_encoder: FeedForward,
             span_label_predictor: FeedForward,
             initializer: InitializerApplicator,
             use_graph,
             bi_dropout: float = 0.2,
             dropout: float = 0.2) -> None:
    super().__init__(vocab)
    self._is_in_training_mode = False
    self._loss_scale_by_num_values = loss_scale_by_num_values
    self._use_pre_calc_elmo_embeddings = use_pre_calc_elmo_embeddings
    self._word_embeddings = word_embeddings
    self._is_use_elmo = True if self._word_embeddings == "elmo" else False
    self._is_use_graph = use_graph
    if self._is_use_elmo and use_pre_calc_elmo_embeddings:
      self._dialog_elmo_embeddings = self.load_elmo_embeddings(elmo_embedding_path)
      self._dialog_scalar_mix = ScalarMix(mixture_size = 3, trainable=True)

    self._domains, self._ds_id2text, self._ds_text2id, self.value_file_path, \
      self._ds_type, self._ds_use_value_list, num_ds_use_value, self._ds_masked \
                                                                        = self.read_domain_slot_list(domain_slot_list_path)
    self._value_id2text, self._value_text2id = self.load_value_list(domain_slot_list_path)
    self._span_id2text, self._class_id2text = dstqa_util.gen_id2text(self._ds_id2text, self._ds_type)
    self._token_indexers = token_indexers

    self._text_field_embedder = text_field_embedder
    self._text_field_char_embedder = text_field_char_embedder
    self._symbol_embedder = symbol_embedder

    self._ds_dialog_attention = LinearMatrixAttention(base_dim, base_dim, 'x,y,x*y')
    self._dialog_dsv_attention = LinearMatrixAttention(base_dim, base_dim, 'x,y,x*y')
    self._dsv_dialog_attention = LinearMatrixAttention(base_dim, base_dim, 'x,y,x*y')
    self._ds_attention = LinearMatrixAttention(base_dim, base_dim, 'x,y,x*y')
    self._dsv_attention = LinearMatrixAttention(base_dim, base_dim, 'x,y,x*y')
    self._agg_value = torch.nn.Linear(base_dim, base_dim)
    self._agg_nodes = torch.nn.Linear(base_dim, base_dim)
    self._graph_gamma = torch.nn.Linear(base_dim, 1)
    self._class_prediction_layer = class_prediction_layer
    self._span_prediction_layer = span_prediction_layer
    self._span_label_predictor = span_label_predictor
    self._span_start_encoder = span_start_encoder
    self._span_end_encoder = span_end_encoder
    self._phrase_layer = phrase_layer
    self._cross_entropy = CrossEntropyLoss(ignore_index=-1)
    self._accuracy = Accuracy(self._ds_id2text, self._ds_type)
    self._dropout = torch.nn.Dropout(dropout)
    self._bi_dropout = torch.nn.Dropout(bi_dropout)
    self._dropout2 = torch.nn.Dropout(0.1)
    self._sigmoid = torch.nn.Sigmoid()
    initializer(self)