Python Model.cross_validate Examples

Programming Language: Python

Namespace/Package Name: medacy.model.model

Class/Type: Model

Method/Function: cross_validate

Examples at hotexamples.com: 4

Python Model.cross_validate - 4 examples found. These are the top rated real world Python examples of medacy.model.model.Model.cross_validate extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Model(14)

load(5)

predict(5)

cross_validate(4)

fit(4)

dump(2)

load_external(2)

_run_through_pipeline(1)

preprocess(1)

Example #1

Show file

    def test_cross_validate(self):
        """Ensures that changes made in the package do not prevent cross_validate from running to completion"""
        model = Model(self.pipeline)

        # Test that invalid fold counts raise ValueError
        for num in [-1, 0, 1]:
            with self.assertRaises(ValueError):
                model.cross_validate(self.dataset, num)

        try:
            resulting_data = model.cross_validate(self.dataset, 2)
            # Checking the log can help verify that the results of cross validation are expectable
            logging.debug(resulting_data)
        except:
            self.assertTrue(False)

Example #2

Show file

    def test_cross_validate_create_groundtruth_predictions(self):
        """
        Tests that during cross validation, the medaCy groundtruth (that is, the version of the training dataset
        used by medaCy) is written as well as the predictions that are created for each fold
        """
        model = Model(self.pipeline)
        model.cross_validate(self.dataset,
                             num_folds=2,
                             prediction_directory=self.prediction_directory_3,
                             groundtruth_directory=self.groundtruth_directory)

        prediction_dataset = Dataset(self.prediction_directory_3)
        groundtruth_dataset = Dataset(self.groundtruth_directory)

        for d in [prediction_dataset, groundtruth_dataset]:
            self.assertIsInstance(d, Dataset)

        original_file_names = {d.file_name for d in self.dataset}
        prediction_file_names = {d.file_name for d in prediction_dataset}
        groundtruth_file_names = {d.file_name for d in groundtruth_dataset}

        for n in [prediction_file_names, groundtruth_file_names]:
            self.assertSetEqual(n, original_file_names)

        # Container for all Annotations in all files in all folds
        all_anns_all_folds_actual = Annotations([])

        # Test that fold groundtruth is written to file
        for fold_name in ["fold_1", "fold_2"]:
            fold_dataset = Dataset(groundtruth_dataset.data_directory /
                                   fold_name)
            for d in fold_dataset:
                fold_ann = Annotations(d.ann_path)
                groundtruth_ann = groundtruth_dataset[d.file_name]
                # Test that the entities in the fold groundtruth are a subset of the whole for that file
                self.assertTrue(set(fold_ann) <= set(groundtruth_ann))
                all_anns_all_folds_actual |= fold_ann

        # Container for all annotations pulled directly from the groundtruth dataset
        all_groundtruth_tuples = Annotations([])
        for ann in groundtruth_dataset.generate_annotations():
            all_groundtruth_tuples |= ann

        expected = set(all_groundtruth_tuples)
        actual = set(all_anns_all_folds_actual)
        self.assertSetEqual(expected, actual)

Example #3

Show file

File: test_bert.py Project: veeravalliss/medaCy

    def test_cross_validate_fit_predict(self):
        """Tests that a model created with BERT can be fitted and used to predict, with and without the CRF layer"""
        pipeline = BertPipeline(entities=self.entities,
                                pretrained_model='bert-base-cased',
                                batch_size=self.batch_size,
                                cuda_device=cuda_device)

        pipeline_crf = BertPipeline(entities=self.entities,
                                    pretrained_model='bert-base-cased',
                                    batch_size=self.batch_size,
                                    cuda_device=cuda_device,
                                    using_crf=True)

        for pipe in [pipeline, pipeline_crf]:
            model = Model(pipe)
            model.cross_validate(self.dataset, 2)
            model.fit(self.dataset)
            resulting_dataset = model.predict(
                self.dataset, prediction_directory=self.prediction_directory)
            self.assertIsInstance(resulting_dataset, Dataset)
            # Test that there is at least one prediction
            if not any(resulting_dataset.generate_annotations()):
                warn("The model did not generate any predictions")

Example #4

Show file

pipeline = SystematicReviewPipeline(entities=entities, use_metamap=True)
model = Model(pipeline, n_jobs=1)
# number of cores to utilize during feature extraction when training the model.
# Note: this is done by forking, not threading hence utlizes a large amount of memory.

# Write information about model before training
with open(model_directory + "/model_information.txt", 'w') as model_info:
    model_info.write("Entities: [%s]\n" % ", ".join(entities))
    model_info.write("Training Files: %i\n" %
                     len(train_dataset.get_data_files()))
    model_info.write(model_notes + "\n")
    model_info.write(str(model))

model.fit(train_dataset)

# dump fitted model
current_time = datetime.datetime.fromtimestamp(
    time.time()).strftime('%Y_%m_%d_%H.%M.%S')
model.dump(model_directory + "/tac_2018_%s_%s.pkl" %
           (model_name, current_time))

# predicts over the datasets in evaluation_dataset utilizing the model trained above, then stores those predictions
# in a given output directory
model.predict(evaluation_dataset,
              prediction_directory=os.path.join(model_directory,
                                                'predictions'))

# performs sequence stratified cross validation over the trained model.
# Note that all extracted features are stored in memory while this runs.
model.cross_validate(training_dataset=train_dataset)