예제 #1
0
파일: dataset.py 프로젝트: yushu-liu/medaCy
    def compute_counts(self):
        """
        Computes entity and relation counts over all documents in this dataset.

        :return: a dictionary of entity and relation counts.
        """
        dataset_counts = {'entities': {}, 'relations': {}}

        for data_file in self:
            annotation = Annotations(data_file.ann_path)
            annotation_counts = annotation.compute_counts()
            dataset_counts['entities'] = {
                x: dataset_counts['entities'].get(x, 0) +
                annotation_counts['entities'].get(x, 0)
                for x in set(dataset_counts['entities']).union(
                    annotation_counts['entities'])
            }
            dataset_counts['relations'] = {
                x: dataset_counts['relations'].get(x, 0) +
                annotation_counts['relations'].get(x, 0)
                for x in set(dataset_counts['relations']).union(
                    annotation_counts['relations'])
            }

        return dataset_counts
예제 #2
0
 def test_compute_counts(self):
     annotations1 = Annotations(join(self.dataset.get_data_directory(),
                                     self.ann_files[0]),
                                annotation_type='ann')
     self.assertIsInstance(annotations1.compute_counts(), dict)