def test_batch_predict_caffe2_model(self): with tempfile.NamedTemporaryFile() as snapshot_file, tempfile.NamedTemporaryFile() as caffe2_model_file: train_data = tests_module.test_file("train_data_tiny.tsv") eval_data = tests_module.test_file("test_data_tiny.tsv") config = PyTextConfig( task=DocumentClassificationTask.Config( model=DocModel.Config( inputs=DocModel.Config.ModelInput( tokens=TokenTensorizer.Config(), dense=FloatListTensorizer.Config( column="dense", dim=1, error_check=True ), labels=LabelTensorizer.Config(), ) ), data=Data.Config( source=TSVDataSource.Config( train_filename=train_data, eval_filename=eval_data, test_filename=eval_data, field_names=["label", "slots", "text", "dense"], ) ), ), version=21, save_snapshot_path=snapshot_file.name, export_caffe2_path=caffe2_model_file.name, ) task = create_task(config.task) task.export(task.model, caffe2_model_file.name) model = task.model save(config, model, meta=None, tensorizers=task.data.tensorizers) pt_results = task.predict(task.data.data_source.test) def assert_caffe2_results_correct(caffe2_results): for pt_res, res in zip(pt_results, caffe2_results): np.testing.assert_array_almost_equal( pt_res["score"].tolist()[0], [score[0] for score in res.values()], ) results = batch_predict_caffe2_model( snapshot_file.name, caffe2_model_file.name ) self.assertEqual(4, len(results)) assert_caffe2_results_correct(results) results = batch_predict_caffe2_model( snapshot_file.name, caffe2_model_file.name, cache_size=2 ) self.assertEqual(4, len(results)) assert_caffe2_results_correct(results) results = batch_predict_caffe2_model( snapshot_file.name, caffe2_model_file.name, cache_size=-1 ) self.assertEqual(4, len(results)) assert_caffe2_results_correct(results)
class Config(Task.Config): model: DocModel.Config = DocModel.Config() trainer: Trainer.Config = Trainer.Config() features: DocClassification.ModelInputConfig = ( DocClassification.ModelInputConfig()) labels: DocClassification.TargetConfig = DocClassification.TargetConfig( ) data_handler: DocClassificationDataHandler.Config = ( DocClassificationDataHandler.Config()) metric_reporter: ClassificationMetricReporter.Config = ( ClassificationMetricReporter.Config())
def DISABLED_test_freeze_word_embedding(self): model = create_model( DocModel.Config(), FeatureConfig( word_feat=WordFeatConfig(freeze=True, mlp_layer_dims=[4]), dict_feat=DictFeatConfig(), ), metadata=mock_metadata(), ) # word embedding for param in model.embedding[0].word_embedding.parameters(): self.assertFalse(param.requires_grad) for param in model.embedding[0].mlp.parameters(): self.assertTrue(param.requires_grad) # dict feat embedding for param in model.embedding[1].parameters(): self.assertTrue(param.requires_grad)
class Config(NewTask.Config): model: BaseModel.Config = DocModel.Config() metric_reporter: Union[ClassificationMetricReporter.Config, PureLossMetricReporter. Config] = ClassificationMetricReporter.Config()
def example_config(cls): return cls.Config(model=BaggingDocEnsembleModel.Config( models=[DocModel.Config()]))
def example_config(cls): return cls.Config( labels=[DocLabelConfig(), WordLabelConfig()], model=BaggingDocEnsemble.Config(models=[DocModel.Config()]), )
def DISABLED_test_freeze_all_embedding(self): model = create_model(DocModel.Config(), FeatureConfig(freeze=True), metadata=mock_metadata()) for param in model.embedding.parameters(): self.assertFalse(param.requires_grad)
class Config(NewTask.Config): model: BaseModel.Config = DocModel.Config() metric_reporter: ClassificationMetricReporter.Config = ( ClassificationMetricReporter.Config() )
def test_load_save(self): text_field_meta = FieldMeta() text_field_meta.vocab = VocabStub() text_field_meta.vocab_size = 4 text_field_meta.unk_token_idx = 1 text_field_meta.pad_token_idx = 0 text_field_meta.pretrained_embeds_weight = None label_meta = FieldMeta() label_meta.vocab = VocabStub() label_meta.vocab_size = 3 metadata = CommonMetadata() metadata.features = {DatasetFieldName.TEXT_FIELD: text_field_meta} metadata.target = label_meta saved_model = create_model( DocModel.Config( representation=BiLSTMDocAttention.Config( save_path=self.representation_path), decoder=MLPDecoder.Config(save_path=self.decoder_path), ), FeatureConfig(save_path=self.embedding_path), metadata, ) saved_model.save_modules() loaded_model = create_model( DocModel.Config( representation=BiLSTMDocAttention.Config( load_path=self.representation_path), decoder=MLPDecoder.Config(load_path=self.decoder_path), ), FeatureConfig(load_path=self.embedding_path), metadata, ) random_model = create_model( DocModel.Config(representation=BiLSTMDocAttention.Config(), decoder=MLPDecoder.Config()), FeatureConfig(), metadata, ) # Loaded and saved modules should be equal. Neither should be equal to # a randomly initialised model. for p1, p2, p3 in itertools.zip_longest( saved_model.embedding.parameters(), loaded_model.embedding.parameters(), random_model.embedding.parameters(), ): self.assertTrue(p1.equal(p2)) self.assertFalse(p3.equal(p1)) self.assertFalse(p3.equal(p2)) for p1, p2, p3 in itertools.zip_longest( saved_model.representation.parameters(), loaded_model.representation.parameters(), random_model.representation.parameters(), ): self.assertTrue(p1.equal(p2)) self.assertFalse(p3.equal(p1)) self.assertFalse(p3.equal(p2)) for p1, p2, p3 in itertools.zip_longest( saved_model.decoder.parameters(), loaded_model.decoder.parameters(), random_model.decoder.parameters(), ): self.assertTrue(p1.equal(p2)) self.assertFalse(p3.equal(p1)) self.assertFalse(p3.equal(p2))