コード例 #1
0
    def test_get_param_groups_for_optimizer(self):
        word_embedding = WordEmbedding(
            num_embeddings=5,
            embedding_dim=4,
            embeddings_weight=None,
            init_range=(-1, 1),
            unk_token_idx=4,
            pad_token_idx=3,
            mlp_layer_dims=[],
        )
        char_embedding = CharacterEmbedding(num_embeddings=5,
                                            embed_dim=4,
                                            out_channels=2,
                                            kernel_sizes=[1, 2])
        embedding_list = EmbeddingList([word_embedding, char_embedding],
                                       concat=True)
        param_groups = embedding_list.get_param_groups_for_optimizer()
        self.assertEqual(len(param_groups), 1)

        param_names = set(param_groups[0].keys())
        expected_param_names = {
            name
            for name, _ in embedding_list.named_parameters()
        }
        self.assertSetEqual(param_names, expected_param_names)
コード例 #2
0
    class Config(RNNGParserBase.Config):
        class ModelInput(BaseModel.Config.ModelInput):
            tokens: TokenTensorizer.Config = TokenTensorizer.Config(
                column="tokenized_text")
            actions: AnnotationNumberizer.Config = AnnotationNumberizer.Config(
            )

        inputs: ModelInput = ModelInput()
        embedding: WordEmbedding.Config = WordEmbedding.Config()
コード例 #3
0
    def test_generator(self):
        model = Seq2SeqModel.from_config(
            Seq2SeqModel.Config(
                source_embedding=WordEmbedding.Config(embed_dim=512),
                target_embedding=WordEmbedding.Config(embed_dim=512),
            ),
            self._get_tensorizers(),
        )
        sample, _ = get_example_and_check()
        repacked_inputs = {
            "src_tokens": sample[0].t(),
            "src_lengths": sample[1]
        }

        scripted_generator = ScriptedSequenceGenerator(
            [model.model], model.trg_eos_index,
            ScriptedSequenceGenerator.Config())
        scripted_preds = scripted_generator.generate_hypo(repacked_inputs)

        self.assertIsNotNone(scripted_preds)
コード例 #4
0
ファイル: lmlstm.py プロジェクト: xiachongbuyubing/pytext
    class Config(BaseModel.Config):
        class ModelInput(Model.Config.ModelInput):
            tokens: TokenTensorizer.Config = TokenTensorizer.Config(
                add_bos_token=True, add_eos_token=True)

        inputs: ModelInput = ModelInput()
        embedding: WordEmbedding.Config = WordEmbedding.Config()
        representation: BiLSTM.Config = BiLSTM.Config(bidirectional=False)
        decoder: Optional[MLPDecoder.Config] = MLPDecoder.Config()
        output_layer: LMOutputLayer.Config = LMOutputLayer.Config()
        tied_weights: bool = False
        stateful: bool = False
コード例 #5
0
 def test_empty_mlp_layer_dims(self):
     num_embeddings = 5
     embedding_dim = 4
     embedding_module = WordEmbedding(
         num_embeddings=num_embeddings,
         embedding_dim=embedding_dim,
         embeddings_weight=None,
         init_range=[-1, 1],
         unk_token_idx=4,
         mlp_layer_dims=[],
     )
     self.assertEqual(embedding_module.embedding_dim, embedding_dim)
コード例 #6
0
 def _create_dummy_model(self):
     return create_model(
         DocModel_Deprecated.Config(
             representation=BiLSTMDocAttention.Config(
                 save_path=self.representation_path),
             decoder=MLPDecoder.Config(save_path=self.decoder_path),
         ),
         FeatureConfig(
             word_feat=WordEmbedding.Config(
                 embed_dim=300, save_path=self.word_embedding_path),
             save_path=self.embedding_path,
         ),
         self._create_dummy_meta_data(),
     )
コード例 #7
0
ファイル: lmlstm.py プロジェクト: amohamedwa/pytext-1
    class Config(BaseModel.Config):
        class ModelInput(Model.Config.ModelInput):
            tokens: Optional[TokenTensorizer.Config] = TokenTensorizer.Config(
                add_bos_token=True, add_eos_token=True)

        inputs: ModelInput = ModelInput()
        embedding: WordEmbedding.Config = WordEmbedding.Config()
        representation: Union[BiLSTM.Config,
                              CNN.Config] = BiLSTM.Config(bidirectional=False)
        decoder: Optional[MLPDecoder.Config] = MLPDecoder.Config()
        output_layer: LMOutputLayer.Config = LMOutputLayer.Config()
        tied_weights: bool = False
        stateful: bool = False
        caffe2_format: ExporterType = ExporterType.PREDICTOR
コード例 #8
0
    def test_basic(self):
        # Setup embedding
        num_embeddings = 5
        output_dim = 6
        embedding_module = WordEmbedding(
            num_embeddings=num_embeddings,
            embedding_dim=4,
            embeddings_weight=None,
            init_range=[-1, 1],
            unk_token_idx=4,
            mlp_layer_dims=[3, output_dim],
        )
        self.assertEqual(embedding_module.embedding_dim, output_dim)

        # Check output shape
        input_batch_size, input_len = 4, 6
        token_ids = torch.randint(
            low=0, high=num_embeddings, size=[input_batch_size, input_len]
        )
        output_embedding = embedding_module(token_ids)
        expected_output_dims = [input_batch_size, input_len, output_dim]
        self.assertEqual(list(output_embedding.size()), expected_output_dims)