def test_get_param_groups_for_optimizer(self): word_embedding = WordEmbedding( num_embeddings=5, embedding_dim=4, embeddings_weight=None, init_range=(-1, 1), unk_token_idx=4, pad_token_idx=3, mlp_layer_dims=[], ) char_embedding = CharacterEmbedding(num_embeddings=5, embed_dim=4, out_channels=2, kernel_sizes=[1, 2]) embedding_list = EmbeddingList([word_embedding, char_embedding], concat=True) param_groups = embedding_list.get_param_groups_for_optimizer() self.assertEqual(len(param_groups), 1) param_names = set(param_groups[0].keys()) expected_param_names = { name for name, _ in embedding_list.named_parameters() } self.assertSetEqual(param_names, expected_param_names)
class Config(RNNGParserBase.Config): class ModelInput(BaseModel.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config( column="tokenized_text") actions: AnnotationNumberizer.Config = AnnotationNumberizer.Config( ) inputs: ModelInput = ModelInput() embedding: WordEmbedding.Config = WordEmbedding.Config()
def test_generator(self): model = Seq2SeqModel.from_config( Seq2SeqModel.Config( source_embedding=WordEmbedding.Config(embed_dim=512), target_embedding=WordEmbedding.Config(embed_dim=512), ), self._get_tensorizers(), ) sample, _ = get_example_and_check() repacked_inputs = { "src_tokens": sample[0].t(), "src_lengths": sample[1] } scripted_generator = ScriptedSequenceGenerator( [model.model], model.trg_eos_index, ScriptedSequenceGenerator.Config()) scripted_preds = scripted_generator.generate_hypo(repacked_inputs) self.assertIsNotNone(scripted_preds)
class Config(BaseModel.Config): class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config( add_bos_token=True, add_eos_token=True) inputs: ModelInput = ModelInput() embedding: WordEmbedding.Config = WordEmbedding.Config() representation: BiLSTM.Config = BiLSTM.Config(bidirectional=False) decoder: Optional[MLPDecoder.Config] = MLPDecoder.Config() output_layer: LMOutputLayer.Config = LMOutputLayer.Config() tied_weights: bool = False stateful: bool = False
def test_empty_mlp_layer_dims(self): num_embeddings = 5 embedding_dim = 4 embedding_module = WordEmbedding( num_embeddings=num_embeddings, embedding_dim=embedding_dim, embeddings_weight=None, init_range=[-1, 1], unk_token_idx=4, mlp_layer_dims=[], ) self.assertEqual(embedding_module.embedding_dim, embedding_dim)
def _create_dummy_model(self): return create_model( DocModel_Deprecated.Config( representation=BiLSTMDocAttention.Config( save_path=self.representation_path), decoder=MLPDecoder.Config(save_path=self.decoder_path), ), FeatureConfig( word_feat=WordEmbedding.Config( embed_dim=300, save_path=self.word_embedding_path), save_path=self.embedding_path, ), self._create_dummy_meta_data(), )
class Config(BaseModel.Config): class ModelInput(Model.Config.ModelInput): tokens: Optional[TokenTensorizer.Config] = TokenTensorizer.Config( add_bos_token=True, add_eos_token=True) inputs: ModelInput = ModelInput() embedding: WordEmbedding.Config = WordEmbedding.Config() representation: Union[BiLSTM.Config, CNN.Config] = BiLSTM.Config(bidirectional=False) decoder: Optional[MLPDecoder.Config] = MLPDecoder.Config() output_layer: LMOutputLayer.Config = LMOutputLayer.Config() tied_weights: bool = False stateful: bool = False caffe2_format: ExporterType = ExporterType.PREDICTOR
def test_basic(self): # Setup embedding num_embeddings = 5 output_dim = 6 embedding_module = WordEmbedding( num_embeddings=num_embeddings, embedding_dim=4, embeddings_weight=None, init_range=[-1, 1], unk_token_idx=4, mlp_layer_dims=[3, output_dim], ) self.assertEqual(embedding_module.embedding_dim, output_dim) # Check output shape input_batch_size, input_len = 4, 6 token_ids = torch.randint( low=0, high=num_embeddings, size=[input_batch_size, input_len] ) output_embedding = embedding_module(token_ids) expected_output_dims = [input_batch_size, input_len, output_dim] self.assertEqual(list(output_embedding.size()), expected_output_dims)