Exemple #1
0
    def test_batch_predict_caffe2_model(self):
        with tempfile.NamedTemporaryFile() as snapshot_file, tempfile.NamedTemporaryFile() as caffe2_model_file:
            train_data = tests_module.test_file("train_data_tiny.tsv")
            eval_data = tests_module.test_file("test_data_tiny.tsv")
            config = PyTextConfig(
                task=DocumentClassificationTask.Config(
                    model=DocModel.Config(
                        inputs=DocModel.Config.ModelInput(
                            tokens=TokenTensorizer.Config(),
                            dense=FloatListTensorizer.Config(
                                column="dense", dim=1, error_check=True
                            ),
                            labels=LabelTensorizer.Config(),
                        )
                    ),
                    data=Data.Config(
                        source=TSVDataSource.Config(
                            train_filename=train_data,
                            eval_filename=eval_data,
                            test_filename=eval_data,
                            field_names=["label", "slots", "text", "dense"],
                        )
                    ),
                ),
                version=21,
                save_snapshot_path=snapshot_file.name,
                export_caffe2_path=caffe2_model_file.name,
            )
            task = create_task(config.task)
            task.export(task.model, caffe2_model_file.name)
            model = task.model
            save(config, model, meta=None, tensorizers=task.data.tensorizers)

            pt_results = task.predict(task.data.data_source.test)

            def assert_caffe2_results_correct(caffe2_results):
                for pt_res, res in zip(pt_results, caffe2_results):
                    np.testing.assert_array_almost_equal(
                        pt_res["score"].tolist()[0],
                        [score[0] for score in res.values()],
                    )

            results = batch_predict_caffe2_model(
                snapshot_file.name, caffe2_model_file.name
            )
            self.assertEqual(4, len(results))
            assert_caffe2_results_correct(results)

            results = batch_predict_caffe2_model(
                snapshot_file.name, caffe2_model_file.name, cache_size=2
            )
            self.assertEqual(4, len(results))
            assert_caffe2_results_correct(results)

            results = batch_predict_caffe2_model(
                snapshot_file.name, caffe2_model_file.name, cache_size=-1
            )
            self.assertEqual(4, len(results))
            assert_caffe2_results_correct(results)
Exemple #2
0
 class Config(Task.Config):
     model: DocModel.Config = DocModel.Config()
     trainer: Trainer.Config = Trainer.Config()
     features: DocClassification.ModelInputConfig = (
         DocClassification.ModelInputConfig())
     labels: DocClassification.TargetConfig = DocClassification.TargetConfig(
     )
     data_handler: DocClassificationDataHandler.Config = (
         DocClassificationDataHandler.Config())
     metric_reporter: ClassificationMetricReporter.Config = (
         ClassificationMetricReporter.Config())
Exemple #3
0
    def DISABLED_test_freeze_word_embedding(self):
        model = create_model(
            DocModel.Config(),
            FeatureConfig(
                word_feat=WordFeatConfig(freeze=True, mlp_layer_dims=[4]),
                dict_feat=DictFeatConfig(),
            ),
            metadata=mock_metadata(),
        )
        # word embedding
        for param in model.embedding[0].word_embedding.parameters():
            self.assertFalse(param.requires_grad)
        for param in model.embedding[0].mlp.parameters():
            self.assertTrue(param.requires_grad)

        # dict feat embedding
        for param in model.embedding[1].parameters():
            self.assertTrue(param.requires_grad)
Exemple #4
0
 class Config(NewTask.Config):
     model: BaseModel.Config = DocModel.Config()
     metric_reporter: Union[ClassificationMetricReporter.Config,
                            PureLossMetricReporter.
                            Config] = ClassificationMetricReporter.Config()
Exemple #5
0
 def example_config(cls):
     return cls.Config(model=BaggingDocEnsembleModel.Config(
         models=[DocModel.Config()]))
Exemple #6
0
 def example_config(cls):
     return cls.Config(
         labels=[DocLabelConfig(), WordLabelConfig()],
         model=BaggingDocEnsemble.Config(models=[DocModel.Config()]),
     )
Exemple #7
0
 def DISABLED_test_freeze_all_embedding(self):
     model = create_model(DocModel.Config(),
                          FeatureConfig(freeze=True),
                          metadata=mock_metadata())
     for param in model.embedding.parameters():
         self.assertFalse(param.requires_grad)
Exemple #8
0
 class Config(NewTask.Config):
     model: BaseModel.Config = DocModel.Config()
     metric_reporter: ClassificationMetricReporter.Config = (
         ClassificationMetricReporter.Config()
     )
Exemple #9
0
    def test_load_save(self):
        text_field_meta = FieldMeta()
        text_field_meta.vocab = VocabStub()
        text_field_meta.vocab_size = 4
        text_field_meta.unk_token_idx = 1
        text_field_meta.pad_token_idx = 0
        text_field_meta.pretrained_embeds_weight = None
        label_meta = FieldMeta()
        label_meta.vocab = VocabStub()
        label_meta.vocab_size = 3
        metadata = CommonMetadata()
        metadata.features = {DatasetFieldName.TEXT_FIELD: text_field_meta}
        metadata.target = label_meta

        saved_model = create_model(
            DocModel.Config(
                representation=BiLSTMDocAttention.Config(
                    save_path=self.representation_path),
                decoder=MLPDecoder.Config(save_path=self.decoder_path),
            ),
            FeatureConfig(save_path=self.embedding_path),
            metadata,
        )
        saved_model.save_modules()

        loaded_model = create_model(
            DocModel.Config(
                representation=BiLSTMDocAttention.Config(
                    load_path=self.representation_path),
                decoder=MLPDecoder.Config(load_path=self.decoder_path),
            ),
            FeatureConfig(load_path=self.embedding_path),
            metadata,
        )

        random_model = create_model(
            DocModel.Config(representation=BiLSTMDocAttention.Config(),
                            decoder=MLPDecoder.Config()),
            FeatureConfig(),
            metadata,
        )

        # Loaded and saved modules should be equal. Neither should be equal to
        # a randomly initialised model.

        for p1, p2, p3 in itertools.zip_longest(
                saved_model.embedding.parameters(),
                loaded_model.embedding.parameters(),
                random_model.embedding.parameters(),
        ):
            self.assertTrue(p1.equal(p2))
            self.assertFalse(p3.equal(p1))
            self.assertFalse(p3.equal(p2))

        for p1, p2, p3 in itertools.zip_longest(
                saved_model.representation.parameters(),
                loaded_model.representation.parameters(),
                random_model.representation.parameters(),
        ):
            self.assertTrue(p1.equal(p2))
            self.assertFalse(p3.equal(p1))
            self.assertFalse(p3.equal(p2))

        for p1, p2, p3 in itertools.zip_longest(
                saved_model.decoder.parameters(),
                loaded_model.decoder.parameters(),
                random_model.decoder.parameters(),
        ):
            self.assertTrue(p1.equal(p2))
            self.assertFalse(p3.equal(p1))
            self.assertFalse(p3.equal(p2))