Python Model.predict Examples

Programming Language: Python

Namespace/Package Name: medacy.model.model

Class/Type: Model

Method/Function: predict

Examples at hotexamples.com: 5

Python Model.predict - 5 examples found. These are the top rated real world Python examples of medacy.model.model.Model.predict extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Model(14)

load(5)

predict(5)

cross_validate(4)

fit(4)

dump(2)

load_external(2)

_run_through_pipeline(1)

preprocess(1)

Example #1

Show file

    def test_fit_predict_dump_load(self):
        """Fits a model, tests that it predicts correctly, dumps and loads it, then tests that it still predicts"""

        model = Model(self.pipeline)

        # Test attempting to predict before fitting
        with self.assertRaises(RuntimeError):
            model.predict('Lorem ipsum dolor sit amet.')

        model.fit(self.dataset,
                  groundtruth_directory=self.groundtruth_2_directory)
        # Test X and y data are set
        self.assertTrue(model.X_data)
        self.assertTrue(model.y_data)

        # Test that there is at least one prediction
        resulting_ann = model.predict(
            'To exclude the possibility that alterations in PSSD might be a consequence of changes in the volume of reference, we used a subset of the vibratome sections'
        )
        self.assertIsInstance(resulting_ann, Annotations)
        self.assertTrue(resulting_ann)

        # Test prediction over directory
        resulting_dataset = model.predict(
            self.dataset.data_directory,
            prediction_directory=self.prediction_directory)
        self.assertIsInstance(resulting_dataset, Dataset)
        self.assertEqual(len(self.dataset), len(resulting_dataset))

        # Test that groundtruth is written
        groundtruth_dataset = Dataset(self.groundtruth_2_directory)
        expected = [d.file_name for d in self.dataset]
        actual = [d.file_name for d in groundtruth_dataset]
        self.assertListEqual(expected, actual)

        # Test that the groundtruth ann files have content
        for ann in groundtruth_dataset.generate_annotations():
            self.assertTrue(ann)

        # Test pickling a model
        pickle_path = os.path.join(self.prediction_directory, 'test.pkl')
        model.dump(pickle_path)
        new_model = Model(self.pipeline)
        new_model.load(pickle_path)

        # Test that there is at least one prediction
        resulting_ann = new_model.predict(
            'To exclude the possibility that alterations in PSSD might be a consequence of changes in the volume of reference, we used a subset of the vibratome sections'
        )
        self.assertIsInstance(resulting_ann, Annotations)
        self.assertTrue(resulting_ann)

Example #2

Show file

    def test_predict(self):
        """
        predict() has different functionality depending on what is passed to it; therefore this test
        ensures that each type of input is handled correctly
        """

        # Init the Model
        pipe = TestingPipeline(entities=self.entities)
        sample_model_path = os.path.join(test_dir, 'sample_models',
                                         'sample_test_pipe.pkl')
        model = Model(pipe)
        model.load(sample_model_path)

        # Test passing a Dataset
        dataset_output = model.predict(self.dataset)
        self.assertIsInstance(dataset_output, Dataset)
        self.assertEqual(len(dataset_output), len(self.dataset))

        # Test passing a directory
        directory_output = model.predict(self.dataset.data_directory)
        self.assertIsInstance(directory_output, Dataset)
        self.assertEqual(len(directory_output), len(self.dataset))

        # Test passing a string
        string_output = model.predict('This is a sample string.')
        self.assertIsInstance(string_output, Annotations)

        # Test that the predictions are written to the expected location when no path is provided
        expected_dir = os.path.join(self.dataset.data_directory, 'predictions')
        self.assertTrue(os.path.isdir(expected_dir))

        # Delete that directory
        shutil.rmtree(expected_dir)

        # Test predicting to a specific directory
        model.predict(self.dataset.data_directory,
                      prediction_directory=self.prediction_directory_2)
        expected_files = os.listdir(self.prediction_directory_2)
        self.assertEqual(6, len(expected_files))

Example #3

Show file

    def test_prediction_with_testing_pipeline(self):
        """Tests that a model created with the BiLSTM+CRF can be fitted and used to predict"""
        pipeline = LstmSystematicReviewPipeline(
            entities=self.entities,
            word_embeddings=word_embeddings,
            cuda_device=cuda_device)

        model = Model(pipeline)
        model.fit(self.dataset)
        resulting_dataset = model.predict(
            self.dataset, prediction_directory=self.prediction_directory)
        self.assertIsInstance(resulting_dataset, Dataset)
        # Test that there is at least one prediction
        if not any(resulting_dataset.generate_annotations()):
            warn("The model did not generate any predictions")

Example #4

Show file

File: test_bert.py Project: veeravalliss/medaCy

    def test_cross_validate_fit_predict(self):
        """Tests that a model created with BERT can be fitted and used to predict, with and without the CRF layer"""
        pipeline = BertPipeline(entities=self.entities,
                                pretrained_model='bert-base-cased',
                                batch_size=self.batch_size,
                                cuda_device=cuda_device)

        pipeline_crf = BertPipeline(entities=self.entities,
                                    pretrained_model='bert-base-cased',
                                    batch_size=self.batch_size,
                                    cuda_device=cuda_device,
                                    using_crf=True)

        for pipe in [pipeline, pipeline_crf]:
            model = Model(pipe)
            model.cross_validate(self.dataset, 2)
            model.fit(self.dataset)
            resulting_dataset = model.predict(
                self.dataset, prediction_directory=self.prediction_directory)
            self.assertIsInstance(resulting_dataset, Dataset)
            # Test that there is at least one prediction
            if not any(resulting_dataset.generate_annotations()):
                warn("The model did not generate any predictions")

Example #5

Show file

pipeline = SystematicReviewPipeline(entities=entities, use_metamap=True)
model = Model(pipeline, n_jobs=1)
# number of cores to utilize during feature extraction when training the model.
# Note: this is done by forking, not threading hence utlizes a large amount of memory.

# Write information about model before training
with open(model_directory + "/model_information.txt", 'w') as model_info:
    model_info.write("Entities: [%s]\n" % ", ".join(entities))
    model_info.write("Training Files: %i\n" %
                     len(train_dataset.get_data_files()))
    model_info.write(model_notes + "\n")
    model_info.write(str(model))

model.fit(train_dataset)

# dump fitted model
current_time = datetime.datetime.fromtimestamp(
    time.time()).strftime('%Y_%m_%d_%H.%M.%S')
model.dump(model_directory + "/tac_2018_%s_%s.pkl" %
           (model_name, current_time))

# predicts over the datasets in evaluation_dataset utilizing the model trained above, then stores those predictions
# in a given output directory
model.predict(evaluation_dataset,
              prediction_directory=os.path.join(model_directory,
                                                'predictions'))

# performs sequence stratified cross validation over the trained model.
# Note that all extracted features are stored in memory while this runs.
model.cross_validate(training_dataset=train_dataset)