def test_batch_processing(self):
        sys_out_dir = os.path.join(self.artifact_path, "CL-mt5base", "xnli")

        datasets = [
            os.path.join(sys_out_dir, "datasets", file)
            for file in os.listdir(os.path.join(sys_out_dir, "datasets"))
        ]

        outputs = [
            os.path.join(sys_out_dir, "outputs", file)
            for file in os.listdir(os.path.join(sys_out_dir, "outputs"))
        ]

        file_type = FileType.json
        task_dummy = TaskType.text_classification
        tasks = []
        for dataset, output in zip(datasets, outputs):
            loader = get_custom_dataset_loader(
                task_dummy,
                dataset,
                output,
                dataset_file_type=file_type,
                output_file_type=file_type,
            )
            if not loader.user_defined_metadata_configs:
                raise ValueError(
                    f"user_defined_metadata_configs in system output {output} hasn't "
                    "been specified or task name should be specified")
            tasks.append(loader.user_defined_metadata_configs['task_name'])

        # Get loaders using real `task` and `file_type`
        loaders = [
            get_custom_dataset_loader(
                task,
                dataset,
                output,
                dataset_file_type=file_type,
                output_file_type=file_type,
            ) for dataset, output, task in zip(datasets, outputs, tasks)
        ]
        system_outputs = [loader.load() for loader in loaders]

        # Run analysis
        reports = []
        metadata = {}
        for loader, system_output, task in zip(loaders, system_outputs, tasks):

            metadata.update(loader.user_defined_metadata_configs)

            report = get_processor(task).process(metadata=metadata,
                                                 sys_output=system_output)
            reports.append(report)

        self.assertEqual(len(reports), 2)
Esempio n. 2
0
    def test_generate_system_analysis(self):
        loader = get_custom_dataset_loader(
            TaskType.word_segmentation,
            self.conll_dataset,
            self.conll_output,
            Source.local_filesystem,
            Source.local_filesystem,
            FileType.conll,
            FileType.conll,
        )
        data = loader.load()

        metadata = {
            "task_name": TaskType.word_segmentation.value,
            # "dataset_name": "conll2003",
            # "sub_dataset_name":"ner",
            "metric_names": ["F1Score"],
        }

        processor = get_processor(TaskType.word_segmentation)

        sys_info = processor.process(metadata, data)

        self.assertIsNotNone(sys_info.results.fine_grained)
        self.assertGreater(len(sys_info.results.overall), 0)
Esempio n. 3
0
 def test_load_tsv(self):
     loader = get_custom_dataset_loader(
         TaskType.text_pair_classification,
         self.tsv_dataset,
         self.txt_output,
         Source.local_filesystem,
         Source.local_filesystem,
         FileType.tsv,
         FileType.text,
     )
     data = loader.load()
     self.assertEqual(len(data), 5)
     self.assertEqual(
         data[1],
         {
             'text1':
             'This church choir sings to the masses as they sing joyous ' +
             'songs from the book at a church.',
             'text2':
             'The church is filled with song.',
             'true_label':
             'entailment',
             'id':
             '1',
             'predicted_label':
             'entailment',
         },
     )
    def test_e2e(self):
        loader = get_custom_dataset_loader(
            TaskType.aspect_based_sentiment_classification,
            self.tsv_dataset,
            self.txt_output,
            Source.local_filesystem,
            Source.in_memory,
            FileType.tsv,
            FileType.text,
        )
        data = loader.load()
        self.assertEqual(len(data), 100)
        self.assertEqual(
            data[0],
            {
                'aspect': 'Boot time',
                'text': 'Boot time  is super fast, around anywhere from 35 seconds to '
                + '1 minute.',
                'true_label': 'positive',
                'id': '0',
                'predicted_label': 'positive',
            },
        )

        metadata = {
            "task_name": TaskType.aspect_based_sentiment_classification,
            "metric_names": ["Accuracy", "F1Score"],
        }
        processor = get_processor(TaskType.aspect_based_sentiment_classification)

        sys_info = processor.process(metadata, data)

        self.assertIsNotNone(sys_info.results.fine_grained)
        self.assertGreater(len(sys_info.results.overall), 0)
Esempio n. 5
0
    def test_generate_system_analysis(self):
        loader = get_custom_dataset_loader(
            TaskType.named_entity_recognition,
            self.conll_dataset,
            self.conll_output,
            Source.local_filesystem,
            Source.local_filesystem,
            FileType.conll,
            FileType.conll,
        )
        data = loader.load()

        metadata = {
            "task_name": TaskType.named_entity_recognition.value,
            # "dataset_name": "conll2003",
            # "sub_dataset_name":"ner",
            "metric_names": ["F1Score"],
        }
        processor = get_processor(TaskType.named_entity_recognition)
        sys_info = processor.process(metadata, data)

        self.assertIsNotNone(sys_info.results.fine_grained)
        self.assertGreater(len(sys_info.results.overall), 0)

        # ------ Deep Test --------

        # test: training set dependent features should be disabled when
        # training dataset is not provided
        activate_features = sys_info.results.fine_grained.keys()
        self.assertTrue("span_econ" not in activate_features
                        and "span_efre" not in activate_features)
Esempio n. 6
0
    def test_no_user_defined_features(self):
        loader = get_custom_dataset_loader(
            TaskType.kg_link_tail_prediction,
            self.test_data,
            self.dataset_no_custom_feature,
            dataset_file_type=FileType.json,
            output_file_type=FileType.json,
        )
        data = loader.load()
        self.assertEqual(data.metadata, FileLoaderMetadata())

        metadata = {
            "task_name": TaskType.kg_link_tail_prediction.value,
            "dataset_name": "fb15k-237-subset",
            "metric_configs": [
                HitsConfig(name='Hits4', hits_k=4),  # you can modify k here
                MeanReciprocalRankConfig(name='MRR'),
                MeanRankConfig(name='MR'),
            ],
        }

        processor = get_processor(TaskType.kg_link_tail_prediction.value)

        sys_info = processor.process(metadata, data.samples)

        self.assertIsNotNone(sys_info.results.fine_grained)
        self.assertGreater(len(sys_info.results.overall), 0)
Esempio n. 7
0
    def test_sort_buckets_by_value(self):
        loader = get_custom_dataset_loader(
            TaskType.kg_link_tail_prediction,
            self.test_data,
            self.dataset_no_custom_feature,
        )
        data = loader.load()
        self.assertEqual(data.metadata, FileLoaderMetadata())

        metadata = {
            "task_name": TaskType.kg_link_tail_prediction.value,
            "dataset_name": "fb15k-237",
            "metric_configs": [
                HitsConfig(name='Hits4', hits_k=4),
                MeanReciprocalRankConfig(name='MRR'),
                MeanRankConfig(name='MR'),
            ],
            "sort_by": "performance_value",
            "sort_by_metric": "first",
        }

        processor = get_processor(TaskType.kg_link_tail_prediction.value)
        sys_info = processor.process(metadata, data.samples)

        self.assertIsNotNone(sys_info.results.fine_grained)
        self.assertGreater(len(sys_info.results.overall), 0)

        symmetry_performances = sys_info.results.fine_grained['symmetry']
        if len(symmetry_performances) <= 1:  # can't sort if only 1 item
            return
        for i in range(len(symmetry_performances) - 1):
            first_item = symmetry_performances[i].performances[0].value
            second_item = symmetry_performances[i + 1].performances[0].value
            self.assertGreater(first_item, second_item)
    def test_multiple_qa_customized_feature(self):
        dataset_path = os.path.join(self.artifact_path, "dataset_fig_qa.json")
        output_path = os.path.join(self.artifact_path,
                                   "output_fig_qa_customized_features.json")
        loader = get_custom_dataset_loader(
            TaskType.qa_multiple_choice,
            dataset_path,
            output_path,
            Source.local_filesystem,
            Source.local_filesystem,
            FileType.json,
            FileType.json,
        )
        data = loader.load()
        self.assertIsInstance(data.samples[0]["commonsense_category"], list)
        self.assertEqual(data.samples[0]["commonsense_category"],
                         ["obj", "cul"])

        metadata = {
            "task_name": TaskType.qa_multiple_choice.value,
            "dataset_name": "fig_qa",
            "metric_names": ["Accuracy"],
            # don't forget this, otherwise the user-defined features will be ignored
            "user_defined_features_configs": data.metadata.custom_features,
        }

        processor = get_processor(TaskType.qa_multiple_choice.value)

        sys_info = processor.process(metadata, data.samples)

        self.assertIsNotNone(sys_info.results.fine_grained)
        self.assertGreater(len(sys_info.results.overall), 0)
    def test_no_user_defined_features(self):
        dataset = os.path.join(self.artifact_path, "no_custom_feature.json")
        loader = get_custom_dataset_loader(
            TaskType.kg_link_tail_prediction,
            dataset,
            dataset,
            Source.local_filesystem,
            Source.local_filesystem,
            FileType.json,
            FileType.json,
        )
        data = loader.load()
        self.assertEqual(data.metadata, FileLoaderMetadata())

        metadata = {
            "task_name": TaskType.kg_link_tail_prediction.value,
            "dataset_name": "fb15k-237-subset",
            "metric_configs": [HitsConfig(name='Hits4', hits_k=4)],
        }

        processor = get_processor(TaskType.kg_link_tail_prediction.value)

        sys_info = processor.process(metadata, data.samples)

        self.assertIsNotNone(sys_info.results.fine_grained)
        self.assertGreater(len(sys_info.results.overall), 0)
    def test_custom_features(self):
        loader = get_custom_dataset_loader(
            TaskType.machine_translation,
            self.tsv_dataset,
            self.json_output_with_features,
            Source.local_filesystem,
            Source.local_filesystem,
            FileType.tsv,
            FileType.json,
        )
        data = loader.load()
        self.assertEqual(len(data), 4)
        self.assertEqual(
            data[0],
            {
                'source': 'Ak sa chcete dostať ešte hlbšie, môžete si všimnúť '
                + 'trhlinky.',
                'reference': 'Now just to get really deep in , you can really get to '
                + 'the cracks .',
                'id': '0',
                'hypothesis': 'If you want to get a deeper , you can see the forces .',
                'num_capital_letters': 1,
            },
        )

        processor = get_processor(TaskType.machine_translation.value)

        sys_info = processor.process(dataclasses.asdict(data.metadata), data.samples)
        self.assertTrue('num_capital_letters' in sys_info.results.fine_grained)
    def test_generate_system_analysis(self):
        loader = get_custom_dataset_loader(
            TaskType.machine_translation,
            self.tsv_dataset,
            self.txt_output,
            Source.local_filesystem,
            Source.local_filesystem,
            FileType.tsv,
            FileType.text,
        )
        data = loader.load()

        metadata = {
            "task_name": TaskType.machine_translation.value,
            "dataset_name": "ted_multi",
            "metric_names": ["bleu"],
        }

        processor = get_processor(TaskType.machine_translation.value)

        sys_info = processor.process(metadata, data)

        # analysis.write_to_directory("./")
        self.assertIsNotNone(sys_info.results.fine_grained)
        self.assertGreater(len(sys_info.results.overall), 0)
Esempio n. 12
0
    def test_extractive_qa_en(self):
        json_en_dataset = os.path.join(self.artifact_path,
                                       "dataset-xquad-en.json")
        json_en_output = os.path.join(self.artifact_path,
                                      "output-xquad-en.json")
        loader = get_custom_dataset_loader(
            TaskType.qa_extractive,
            json_en_dataset,
            json_en_output,
            Source.local_filesystem,
            Source.local_filesystem,
            FileType.json,
            FileType.json,
        )
        data = loader.load()
        self.assertEqual(len(data), 1190)
        sample = data[0]
        self.assertEqual(sample["predicted_answers"], {"text": "308"})
        self.assertEqual(sample["id"], "0")
        self.assertEqual(sample["answers"], {
            "answer_start": [-1],
            "text": ["308"]
        })
        self.assertEqual(
            sample["question"],
            "How many points did the Panthers defense surrender ?")
        self.assertTrue(sample["context"].startswith("The Panthers"))

        metadata = {
            "task_name": TaskType.qa_extractive,
            "dataset_name": "squad",
            "metric_names": ["F1ScoreQA", "ExactMatchQA"],
            # "language":"en"
        }

        processor = get_processor(TaskType.qa_extractive)
        sys_info = processor.process(metadata, data)

        # analysis.write_to_directory("./")
        self.assertIsNotNone(sys_info.results.fine_grained)
        self.assertGreater(len(sys_info.results.overall), 0)
        get_logger('test').info(f'OVERALL={sys_info.results.overall}')
        # should be 0.6974789915966386
        self.assertAlmostEqual(
            sys_info.results.overall["ExactMatch"].value,
            0.6974789915966386,
            2,
            "almost equal",
        )
        # should be 0.8235975260931867
        self.assertAlmostEqual(
            sys_info.results.overall["F1"].value,
            0.8235975260931867,
            2,
            "almost equal",
        )
Esempio n. 13
0
 def test_simple_example(self):
     # Load the data
     dataset = self.dataset_no_custom_feature
     task = TaskType.kg_link_tail_prediction
     loader = get_custom_dataset_loader(task, dataset, dataset)
     data = loader.load()
     # Initialize the processor and perform the processing
     processor = get_processor(TaskType.kg_link_tail_prediction.value)
     sys_info = processor.process(metadata={}, sys_output=data.samples)
     # If you want to write out to disk you can use
     sys_info.write_to_directory('./')
 def test_load_json(self):
     loader = get_custom_dataset_loader(
         TaskType.qa_multiple_choice,
         self.json_dataset,
         self.json_output,
         Source.local_filesystem,
         Source.local_filesystem,
         FileType.json,
         FileType.json,
     )
     data = loader.load()
     self.assertEqual(len(data), 4)
Esempio n. 15
0
 def test_load_custom_dataset_tsv(self):
     loader = get_custom_dataset_loader(  # use defaults
         TaskType.text_classification,
         self.tsv_dataset,
         self.txt_output,
     )
     data = loader.load()
     self.assertEqual(len(data), 10)
     self.assertEqual(
         data[6],
         {
             "text": "a weird and wonderful comedy .",
             "true_label": "positive",
             "id": "6",
             "predicted_label": "positive",
         },
     )
Esempio n. 16
0
    def test_extractive_qa_zh(self):
        json_zh_dataset = os.path.join(self.artifact_path,
                                       "dataset-xquad-zh.json")
        json_zh_output = os.path.join(self.artifact_path,
                                      "output-xquad-zh.json")
        loader = get_custom_dataset_loader(
            TaskType.qa_extractive,
            json_zh_dataset,
            json_zh_output,
            Source.local_filesystem,
            Source.local_filesystem,
            FileType.json,
            FileType.json,
        )
        data = loader.load()
        metadata = {
            "task_name": TaskType.qa_extractive.value,
            "dataset_name": "squad",
            "metric_names": ["F1Score", "ExactMatch"],
            "source_language": "zh",
            "target_language": "zh",
        }

        processor = get_processor(TaskType.qa_extractive)

        sys_info = processor.process(metadata, data)
        get_logger('test').info(
            f'--------- sys_info.metric_configs {sys_info.metric_configs}')

        # analysis.write_to_directory("./")
        self.assertIsNotNone(sys_info.results.fine_grained)
        self.assertGreater(len(sys_info.results.overall), 0)
        # 0.6285714285714286
        self.assertAlmostEqual(
            sys_info.results.overall["ExactMatch"].value,
            0.6285714285714286,
            2,
            "almost equal",
        )
        # 0.7559651817716333
        self.assertAlmostEqual(
            sys_info.results.overall["F1"].value,
            0.7559651817716333,
            2,
            "almost equal",
        )
Esempio n. 17
0
    def test_process_metadata_in_output_file(self):
        loader = get_custom_dataset_loader(
            TaskType.text_classification,
            self.json_dataset,
            self.json_output,
            Source.local_filesystem,
            Source.local_filesystem,
            FileType.json,
            FileType.json,
        )
        data = loader.load()
        self.assertNotEqual(data.metadata, FileLoaderMetadata)
        metadata = dataclasses.asdict(data.metadata)
        processor = get_processor(TaskType.text_classification)

        sys_info = processor.process(metadata, data.samples)

        self.assertIsNotNone(sys_info.results.fine_grained)
        self.assertGreater(len(sys_info.results.overall), 0)
Esempio n. 18
0
 def test_load_custom_dataset_json(self):
     loader = get_custom_dataset_loader(
         TaskType.text_classification,
         self.json_dataset,
         self.json_output,
         dataset_file_type=FileType.json,
         output_file_type=FileType.json,
     )
     data = loader.load()
     self.assertNotEqual(data.metadata, FileLoaderMetadata())
     self.assertEqual(len(data), 7)
     self.assertEqual(
         data[6],
         {
             'text': 'guaranteed to move anyone who ever , , or rolled .',
             'true_label': 'positive',
             'id': '6',
             'predicted_label': 'positive',
         },
     )
Esempio n. 19
0
    def test_process(self):
        metadata = {
            "task_name": TaskType.text_classification,
            "metric_names": ["Accuracy", "F1Score"],
        }
        loader = get_custom_dataset_loader(
            TaskType.text_classification,
            load_file_as_str(self.tsv_dataset),
            load_file_as_str(self.txt_output),
            Source.in_memory,
            Source.in_memory,
            FileType.tsv,
            FileType.text,
        )
        data = loader.load()
        processor = get_processor(TaskType.text_classification)
        sys_info = processor.process(metadata, data)

        self.assertIsNotNone(sys_info.results.fine_grained)
        self.assertGreater(len(sys_info.results.overall), 0)
    def test_generate_system_analysis(self):
        loader = get_custom_dataset_loader(
            TaskType.qa_multiple_choice,
            self.json_dataset,
            self.json_output,
            Source.local_filesystem,
            Source.local_filesystem,
            FileType.json,
            FileType.json,
        )
        data = loader.load()
        metadata = {
            "task_name": TaskType.qa_multiple_choice.value,
            "dataset_name": "fig_qa",
            "metric_names": ["Accuracy"],
        }

        processor = get_processor(TaskType.qa_multiple_choice.value)
        sys_info = processor.process(metadata, data)

        self.assertIsNotNone(sys_info.results.fine_grained)
        self.assertGreater(len(sys_info.results.overall), 0)
Esempio n. 21
0
    def test_snli(self):

        metadata = {
            "task_name": TaskType.text_classification.value,
            "metric_names": ["Accuracy"],
        }
        loader = get_custom_dataset_loader(
            TaskType.text_pair_classification,
            self.tsv_dataset,
            self.txt_output,
            Source.local_filesystem,
            Source.local_filesystem,
            FileType.tsv,
            FileType.text,
        )
        data = loader.load()
        processor = get_processor(TaskType.text_pair_classification)

        sys_info = processor.process(metadata, data)

        self.assertIsNotNone(sys_info.results.fine_grained)
        self.assertGreater(len(sys_info.results.overall), 0)
Esempio n. 22
0
    def test_generate_system_analysis(self):
        path_system_details = os.path.join(
            test_artifacts_path, "test_system_details.json"
        )
        dataset_data = os.path.join(
            test_artifacts_path, "text_classification", "dataset.tsv"
        )
        output_data = os.path.join(
            test_artifacts_path, "text_classification", "output.txt"
        )

        with open(path_system_details) as fin:
            system_details = json.load(fin)

        metadata = {
            "task_name": TaskType.text_classification,
            "metric_names": ["Accuracy"],
            "system_details": system_details,
        }

        loader = get_custom_dataset_loader(
            TaskType.text_classification,
            dataset_data,
            output_data,
            Source.local_filesystem,
            Source.local_filesystem,
            FileType.tsv,
            FileType.text,
        )
        data = loader.load()
        processor = get_processor(TaskType.text_classification)

        sys_info = processor.process(metadata, data)

        # analysis.write_to_directory("./")
        self.assertIsNotNone(
            sys_info.system_details, {"learning_rate": 0.0001, "number_of_layers": 10}
        )
 def test_load_tsv(self):
     loader = get_custom_dataset_loader(
         TaskType.machine_translation,
         self.tsv_dataset,
         self.txt_output,
         Source.local_filesystem,
         Source.local_filesystem,
         FileType.tsv,
         FileType.text,
     )
     data = loader.load()
     self.assertEqual(len(data), 4)
     self.assertEqual(
         data[0],
         {
             'source': 'Ak sa chcete dostať ešte hlbšie, môžete si všimnúť '
             + 'trhlinky.',
             'reference': 'Now just to get really deep in , you can really get to '
             + 'the cracks .',
             'id': '0',
             'hypothesis': 'If you want to get a deeper , you can see the forces .',
         },
     )
Esempio n. 24
0
    def test_process_training_set_dependent_features(self):
        metadata = {
            "task_name": TaskType.text_classification.value,
            "metric_names": ["Accuracy", "F1Score"],
            "dataset_name": "ag_news",
            "reload_stat": False,
        }
        loader = get_custom_dataset_loader(
            TaskType.text_classification,
            self.json_dataset,
            self.json_output,
            Source.local_filesystem,
            Source.local_filesystem,
            FileType.json,
            FileType.json,
        )
        data = loader.load()

        processor = get_processor(TaskType.text_classification)
        sys_info = processor.process(metadata, data)

        self.assertIsNotNone(sys_info.results.fine_grained)
        self.assertGreater(len(sys_info.results.overall), 0)
Esempio n. 25
0
 def test_with_user_defined_features(self):
     loader = get_custom_dataset_loader(  # use defaults
         TaskType.kg_link_tail_prediction,
         self.test_data,
         self.dataset_with_custom_feature,
     )
     data = loader.load()
     self.assertEqual(len(data.metadata.custom_features), 1)
     self.assertEqual(len(data), 10)
     self.assertEqual(
         set(data[0].keys()),
         {
             "id",
             "true_head",
             "true_link",
             'true_head_decipher',
             'true_tail_decipher',
             "true_tail",
             "predict",
             "predictions",
             "rel_type",
             "true_rank",
         },
     )