Beispiel #1
0
 def default_output_file_loaders(cls) -> dict[FileType, FileLoader]:
     target_field_names = ["predict", "predictions", "true_rank"]
     return {
         FileType.json:
         JSONFileLoader([
             FileLoaderField("predict", target_field_names[0], str),
             FileLoaderField("predictions", target_field_names[1], list),
             FileLoaderField("true_rank", target_field_names[2], int),
         ])
     }
Beispiel #2
0
 def default_output_file_loaders(cls) -> dict[FileType, FileLoader]:
     return {
         FileType.conll:
         CoNLLFileLoader([FileLoaderField(1, "pred_tags", str)]),
         FileType.json:
         JSONFileLoader([
             FileLoaderField("tokens", "tokens", list),
             FileLoaderField("predicted_tags", "pred_tags", list),
         ]),
     }
 def default_dataset_file_loaders(cls) -> dict[FileType, FileLoader]:
     return {
         FileType.json:
         JSONFileLoader([
             FileLoaderField('text', 'text', str),
             FileLoaderField('edits', 'edits', dict),
         ]),
         FileType.datalab:
         DatalabFileLoader([
             FileLoaderField('text', 'text', str),
             FileLoaderField('edits', 'edits', dict),
         ]),
     }
Beispiel #4
0
 def default_dataset_file_loaders(cls) -> dict[FileType, FileLoader]:
     field_names = ["tokens", "true_tags"]
     return {
         FileType.conll:
         CoNLLFileLoader([
             FileLoaderField(0, field_names[0], str),
             FileLoaderField(1, field_names[1], str),
         ]),
         FileType.datalab:
         DatalabFileLoader([
             FileLoaderField("tokens", field_names[0], list),
             FileLoaderField("tags", field_names[1], list),
         ]),
     }
Beispiel #5
0
 def default_dataset_file_loaders(cls) -> dict[FileType, FileLoader]:
     field_name = 'text'
     return {
         FileType.text:
         TextFileLoader(),
         FileType.json:
         JSONFileLoader([
             FileLoaderField("text", field_name, str),
         ]),
         FileType.datalab:
         DatalabFileLoader([
             FileLoaderField("text", field_name, str),
         ]),
     }
Beispiel #6
0
    def default_dataset_file_loaders(cls) -> dict[FileType, FileLoader]:

        file_path = cache_api.cache_online_file(
            'http://phontron.com/download/explainaboard/pre_computed/kg/entity2wikidata.json',  # noqa
            'pre_computed/kg/entity2wikidata.json',
        )
        with open(file_path, 'r') as file:
            entity_dic = json.loads(file.read())

        map_preprocessor = KGMapPreprocessor(
            resources={"dictionary": entity_dic})

        target_field_names = [
            "true_head",
            "true_head_decipher",
            "true_link",
            "true_tail",
            "true_tail_decipher",
        ]
        return {
            FileType.json:
            JSONFileLoader([
                FileLoaderField("gold_head", target_field_names[0], str),
                FileLoaderField("gold_head",
                                target_field_names[1],
                                str,
                                parser=map_preprocessor),
                FileLoaderField("gold_predicate", target_field_names[2], str),
                FileLoaderField("gold_tail", target_field_names[3], str),
                FileLoaderField("gold_tail",
                                target_field_names[4],
                                str,
                                parser=map_preprocessor),
            ]),
            FileType.datalab:
            DatalabFileLoader([
                FileLoaderField("head", target_field_names[0], str),
                FileLoaderField("head",
                                target_field_names[1],
                                str,
                                parser=map_preprocessor),
                FileLoaderField("link", target_field_names[2], str),
                FileLoaderField("tail", target_field_names[3], str),
                FileLoaderField("tail",
                                target_field_names[4],
                                str,
                                parser=map_preprocessor),
            ]),
        }
 def default_output_file_loaders(cls) -> dict[FileType, FileLoader]:
     return {
         FileType.json:
         JSONFileLoader([
             FileLoaderField("predicted_answers", "predicted_answers", dict)
         ])
     }
Beispiel #8
0
 def test_tsv_validation(self):
     self.assertRaises(
         ValueError,
         lambda: TSVFileLoader(
             [FileLoaderField("0", "field0", str)], use_idx_as_id=True
         ),
     )
Beispiel #9
0
 def default_output_file_loaders(cls) -> dict[FileType, FileLoader]:
     field_name = "hypothesis"
     return {
         FileType.text:
         TextFileLoader(field_name, str),
         FileType.json:
         JSONFileLoader([FileLoaderField(field_name, field_name, str)]),
     }
Beispiel #10
0
 def test_conll_loader(self):
     tabs_path = os.path.join(test_artifacts_path, "ner", "dataset.tsv")
     spaces_path = os.path.join(test_artifacts_path, "ner",
                                "dataset-space.tsv")
     loader_true = CoNLLFileLoader([
         FileLoaderField(0, 'tokens', str),
         FileLoaderField(1, 'true_tags', str),
     ])
     loader_pred = CoNLLFileLoader([
         FileLoaderField(1, 'pred_tags', str),
     ])
     tabs_true = loader_true.load(tabs_path, Source.local_filesystem)
     spaces_true = loader_true.load(spaces_path, Source.local_filesystem)
     self.assertEqual(tabs_true, spaces_true)
     tabs_pred = loader_pred.load(tabs_path, Source.local_filesystem)
     spaces_pred = loader_pred.load(spaces_path, Source.local_filesystem)
     self.assertEqual(tabs_pred, spaces_pred)
Beispiel #11
0
 def _statistics_func(self, samples: Iterator, sys_info: SysOutputInfo):
     if sys_info.source_language is None or sys_info.target_language is None:
         raise ValueError(
             'source or target languages must be specified to load '
             f'translation data, but source={sys_info.source_language} '
             f', target={sys_info.target_language}'
         )
     src = FileLoaderField(('translation', sys_info.source_language), '', str)
     trg = FileLoaderField(('translation', sys_info.target_language), '', str)
     return {
         'source_vocab': accumulate_vocab_from_samples(
             samples,
             lambda x: FileLoader.find_field(x, src),
             unwrap(sys_info.source_tokenizer),
         ),
         'target_vocab': accumulate_vocab_from_samples(
             samples,
             lambda x: FileLoader.find_field(x, trg),
             unwrap(sys_info.target_tokenizer),
         ),
     }
 def default_dataset_file_loaders(cls) -> dict[FileType, FileLoader]:
     target_field_names = ["context", "options", "question_mark", "answers"]
     return {
         FileType.json:
         JSONFileLoader([
             FileLoaderField("context", target_field_names[0], str),
             FileLoaderField("options", target_field_names[1], list),
             FileLoaderField("question_mark", target_field_names[2], str),
             FileLoaderField("answers", target_field_names[3], dict),
         ]),
         FileType.datalab:
         DatalabFileLoader([
             FileLoaderField("context", target_field_names[0], str),
             FileLoaderField("options", target_field_names[1], list),
             FileLoaderField("question_mark", target_field_names[2], str),
             FileLoaderField("answers", target_field_names[3], dict),
         ]),
     }
 def default_dataset_file_loaders(cls) -> dict[FileType, FileLoader]:
     target_names = ["text1", "text2", "true_label"]
     return {
         FileType.tsv:
         TSVFileLoader([
             FileLoaderField(0, target_names[0], str),
             FileLoaderField(1, target_names[1], str),
             FileLoaderField(2, target_names[2], str),
         ], ),
         FileType.json:
         JSONFileLoader([
             FileLoaderField("text1", target_names[0], str),
             FileLoaderField("text2", target_names[1], str),
             FileLoaderField("true_label", target_names[2], str),
         ]),
         FileType.datalab:
         DatalabFileLoader([
             FileLoaderField("text1", target_names[0], str),
             FileLoaderField("text2", target_names[1], str),
             FileLoaderField("label", target_names[2], str),
         ]),
     }
Beispiel #14
0
 def default_dataset_file_loaders(cls) -> dict[FileType, FileLoader]:
     field_names = ["aspect", "text", "true_label"]
     return {
         FileType.tsv:
         TSVFileLoader([
             FileLoaderField(0, field_names[0], str),
             FileLoaderField(1, field_names[1], str),
             FileLoaderField(2, field_names[2], str),
         ], ),
         FileType.json:
         JSONFileLoader([
             FileLoaderField(field_names[0], field_names[0], str),
             FileLoaderField(field_names[1], field_names[1], str),
             FileLoaderField(field_names[2], field_names[2], str),
         ]),
         FileType.datalab:
         DatalabFileLoader([
             FileLoaderField(field_names[0], field_names[0], str),
             FileLoaderField(field_names[1], field_names[1], str),
             FileLoaderField(field_names[2], field_names[2], str),
         ]),
     }
Beispiel #15
0
 def test_load_in_memory_tsv(self):
     loader = Loader(
         dataset_data=load_file_as_str(self.dataset),
         output_data=load_file_as_str(
             os.path.join(test_artifacts_path, "text_classification",
                          "output.txt")),
         dataset_source=Source.in_memory,
         output_source=Source.in_memory,
         dataset_file_type=FileType.tsv,
         output_file_type=FileType.text,
         dataset_file_loader=TSVFileLoader(
             [FileLoaderField(0, "field0", str)], use_idx_as_id=True),
         output_file_loader=TextFileLoader("output", str),
     )
     samples = [sample for sample in loader.load()]
     self.assertEqual(len(samples), 10)
     self.assertEqual(set(samples[0].keys()), {"id", "field0", "output"})
Beispiel #16
0
 def default_dataset_file_loaders(cls) -> dict[FileType, FileLoader]:
     return {
         FileType.tsv:
         TSVFileLoader([
             FileLoaderField(0, cls.OUTPUT_FIELDS[0], str),
             FileLoaderField(1, cls.OUTPUT_FIELDS[1], str),
         ], ),
         FileType.json:
         JSONFileLoader([
             FileLoaderField(cls.JSON_FIELDS[0], cls.OUTPUT_FIELDS[0], str),
             FileLoaderField(cls.JSON_FIELDS[1], cls.OUTPUT_FIELDS[1], str),
         ]),
         FileType.datalab:
         DatalabFileLoader([
             FileLoaderField(cls.JSON_FIELDS[0], cls.OUTPUT_FIELDS[0], str),
             FileLoaderField(cls.JSON_FIELDS[1], cls.OUTPUT_FIELDS[1], str),
         ]),
     }
Beispiel #17
0
 def default_dataset_file_loaders(cls) -> dict[FileType, FileLoader]:
     target_field_names = ["context", "question", "answers"]
     return {
         FileType.json:
         JSONFileLoader([
             FileLoaderField(
                 target_field_names[0],
                 target_field_names[0],
                 str,
                 strip_before_parsing=False,
             ),
             FileLoaderField(
                 target_field_names[1],
                 target_field_names[1],
                 str,
                 strip_before_parsing=False,
             ),
             FileLoaderField(target_field_names[2], target_field_names[2]),
         ]),
         FileType.datalab:
         DatalabFileLoader([
             FileLoaderField(
                 "context",
                 target_field_names[0],
                 str,
                 strip_before_parsing=False,
             ),
             FileLoaderField(
                 "question",
                 target_field_names[1],
                 str,
                 strip_before_parsing=False,
             ),
             FileLoaderField("answers", target_field_names[2]),
         ]),
     }
 def default_output_file_loaders(cls) -> dict[FileType, FileLoader]:
     field_name = "predicted_edits"
     return {
         FileType.json:
         JSONFileLoader([FileLoaderField(field_name, field_name, dict)]),
     }
Beispiel #19
0
 def test_text_file_loader_validate(self):
     loader = TextFileLoader(target_name="prediction", dtype=int)
     self.assertRaises(
         ValueError,
         lambda: loader.add_fields([FileLoaderField("test", "test", str)]),
     )