コード例 #1
0
    def compose_embedding(cls, sub_embs, metadata):
        """Compose embedding list for ContextualIntentSlot model training.
        The first is the word embedding of the last utterance concatenated with the
        word level dictionary feature. The second is the word embedding of a
        sequence of utterances (includes the last utterance). Two embeddings are
        not concatenated and passed to the model individually.

        Args:
            sub_embs (type): sub-embeddings.

        Returns:
            type: EmbeddingList object contains embedding of the last utterance with
                dictionary feature and embedding of the sequence of utterances.

        """
        return EmbeddingList(
            embeddings=[
                EmbeddingList(
                    embeddings=[
                        sub_embs.get(ModelInput.TEXT),
                        sub_embs.get(ModelInput.DICT),
                        sub_embs.get(ModelInput.CHAR),
                        sub_embs.get(ModelInput.CONTEXTUAL_TOKEN_EMBEDDING),
                    ],
                    concat=True,
                ),
                sub_embs.get(ModelInput.SEQ),
            ],
            concat=False,
        )
コード例 #2
0
    def create_embedding(cls, config, tensorizers):
        word_emb = create_module(
            config.word_embedding,
            tensorizer=tensorizers["tokens"],
            init_from_saved_state=config.init_from_saved_state,
        )

        seq_emb_tensorizer = tensorizers["seq_tokens"]
        seq_emb = create_module(config.seq_embedding,
                                tensorizer=seq_emb_tensorizer)
        return EmbeddingList([EmbeddingList([word_emb], concat=True), seq_emb],
                             concat=False)
コード例 #3
0
 def compose_embedding(
         cls, sub_emb_module_dict: Dict[str,
                                        EmbeddingBase]) -> EmbeddingList:
     """disable concatenation of input and output sequences"""
     # return EmbeddingList(sub_emb_module_dict.values(), concat=False)
     seq, *_ = sub_emb_module_dict.values()
     return EmbeddingList((seq, ), concat=False)
コード例 #4
0
    def create_embedding(cls, config, tensorizers: Dict[str, Tensorizer]):
        word_tensorizer = config.inputs.tokens
        byte_tensorizer = config.inputs.token_bytes
        assert word_tensorizer.column == byte_tensorizer.column

        word_embedding = create_module(
            config.embedding,
            tensorizer=tensorizers["tokens"],
            init_from_saved_state=config.init_from_saved_state,
        )
        byte_embedding = create_module(
            config.byte_embedding, vocab_size=ByteTokenTensorizer.NUM_BYTES)
        return EmbeddingList([word_embedding, byte_embedding], concat=True)
コード例 #5
0
    def from_config(
        cls,
        model_config,
        feature_config=None,
        metadata: CommonMetadata = None,
        tensorizers: Dict[str, Tensorizer] = None,
    ):
        if model_config.compositional_type == RNNGParser.Config.CompositionalType.SUM:
            p_compositional = CompositionalSummationNN(
                lstm_dim=model_config.lstm.lstm_dim)
        elif (model_config.compositional_type ==
              RNNGParser.Config.CompositionalType.BLSTM):
            p_compositional = CompositionalNN(
                lstm_dim=model_config.lstm.lstm_dim)
        else:
            raise ValueError("Cannot understand compositional flag {}".format(
                model_config.compositional_type))

        if tensorizers is not None:
            embedding = EmbeddingList(
                [
                    create_module(model_config.embedding,
                                  tensorizer=tensorizers["tokens"])
                ],
                concat=True,
            )
            actions_params = tensorizers["actions"]
            actions_vocab = actions_params.vocab
        else:
            embedding = Model.create_embedding(feature_config,
                                               metadata=metadata)
            actions_params = metadata
            actions_vocab = metadata.actions_vocab

        return cls(
            ablation=model_config.ablation,
            constraints=model_config.constraints,
            lstm_num_layers=model_config.lstm.num_layers,
            lstm_dim=model_config.lstm.lstm_dim,
            max_open_NT=model_config.max_open_NT,
            dropout=model_config.dropout,
            actions_vocab=actions_vocab,
            shift_idx=actions_params.shift_idx,
            reduce_idx=actions_params.reduce_idx,
            ignore_subNTs_roots=actions_params.ignore_subNTs_roots,
            valid_NT_idxs=actions_params.valid_NT_idxs,
            valid_IN_idxs=actions_params.valid_IN_idxs,
            valid_SL_idxs=actions_params.valid_SL_idxs,
            embedding=embedding,
            p_compositional=p_compositional,
        )
コード例 #6
0
ファイル: rnng_test.py プロジェクト: rutyrinott/pytext-1
    def setUp(self):
        actions_counter = Counter()
        for action in [
            "IN:A",
            "IN:B",
            "IN:UNSUPPORTED",
            "REDUCE",
            "SHIFT",
            "SL:C",
            "SL:D",
        ]:
            actions_counter[action] += 1
        actions_vocab = Vocab(actions_counter, specials=[])

        self.parser = RNNGParser(
            ablation=RNNGParser.Config.AblationParams(),
            constraints=RNNGParser.Config.RNNGConstraints(),
            lstm_num_layers=2,
            lstm_dim=20,
            max_open_NT=10,
            dropout=0.2,
            beam_size=3,
            top_k=3,
            actions_vocab=actions_vocab,
            shift_idx=4,
            reduce_idx=3,
            ignore_subNTs_roots=[2],
            valid_NT_idxs=[0, 1, 2, 5, 6],
            valid_IN_idxs=[0, 1, 2],
            valid_SL_idxs=[5, 6],
            embedding=EmbeddingList(
                embeddings=[
                    WordEmbedding(
                        num_embeddings=5,
                        embedding_dim=20,
                        embeddings_weight=None,
                        init_range=[-1, 1],
                        unk_token_idx=4,
                        mlp_layer_dims=[],
                    ),
                    DictEmbedding(
                        num_embeddings=4, embed_dim=10, pooling_type=PoolingType.MEAN
                    ),
                ],
                concat=True,
            ),
            p_compositional=CompositionalNN(lstm_dim=20),
        )
        self.parser.train()
コード例 #7
0
    def create_embedding(cls, config, tensorizers: Dict[str, Tensorizer]):
        word_tensorizer = config.inputs.tokens
        byte_tensorizer = config.inputs.token_bytes
        assert word_tensorizer.column == byte_tensorizer.column

        word_embedding = create_module(config.embedding,
                                       tensorizer=tensorizers["tokens"])
        byte_embedding = CharacterEmbedding(
            ByteTokenTensorizer.NUM_BYTES,
            config.byte_embedding.embed_dim,
            config.byte_embedding.cnn.kernel_num,
            config.byte_embedding.cnn.kernel_sizes,
            config.byte_embedding.highway_layers,
            config.byte_embedding.projection_dim,
        )
        return EmbeddingList([word_embedding, byte_embedding], concat=True)
コード例 #8
0
    def _create_embeddings(
        cls, config: Config, tensorizers: Dict[str, Tensorizer]
    ) -> nn.ModuleList:
        embeddings = []
        for inputs in cls.INPUTS_PAIR:
            embedding_list = []
            for emb, input in zip(cls.EMBEDDINGS, inputs):
                if hasattr(config, emb) and input in tensorizers:
                    embedding_list.append(
                        cls._create_embedding(getattr(config, emb), tensorizers[input])
                    )

            if len(embedding_list) == 1:
                embeddings.append(embedding_list[0])
            else:
                embeddings.append(EmbeddingList(embeddings=embedding_list, concat=True))
        return nn.ModuleList(embeddings)
コード例 #9
0
 def setUp(self):
     contextual_emb_dim = 1
     emb_module = EmbeddingList(
         embeddings=[
             WordEmbedding(num_embeddings=103, embedding_dim=100),
             DictEmbedding(
                 num_embeddings=59, embed_dim=10, pooling_type=PoolingType.MEAN
             ),
             ContextualTokenEmbedding(contextual_emb_dim),
         ],
         concat=True,
     )
     self.training_model = RNNGModel(
         input_for_trace=RNNGModel.get_input_for_trace(contextual_emb_dim),
         embedding=emb_module,
         ablation=RNNGParser.Config.AblationParams(),
         constraints=RNNGParser.Config.RNNGConstraints(),
         lstm_num_layers=2,
         lstm_dim=32,
         max_open_NT=10,
         dropout=0.4,
         num_actions=20,
         shift_idx=0,
         reduce_idx=1,
         ignore_subNTs_roots=[8, 15],
         valid_NT_idxs=[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
         + [12, 13, 14, 15, 16, 17, 18, 19],
         valid_IN_idxs=[2, 4, 7, 8, 10, 12, 13, 14, 15],
         valid_SL_idxs=[3, 5, 6, 9, 11, 16, 17, 18, 19],
         embedding_dim=emb_module.embedding_dim,
         p_compositional=CompositionalNN(lstm_dim=32, device="cpu"),
         device="cpu",
     )
     self.training_model.train()
     self.inference_model = RNNGInference(
         self.training_model.trace_embedding(),
         self.training_model.jit_model,
         MockVocab(["<unk>", "foo", "bar"]),
         MockVocab(["<unk>", "a", "b"]),
         MockVocab(["SHIFT", "REDUCE", "IN:END_CALL", "SL:METHOD_CALL"]),
     )
     self.inference_model.eval()
コード例 #10
0
 def compose_embedding(cls, sub_embs, metadata):
     return EmbeddingList(sub_embs.values(), concat=False)