Exemple #1
0
 def test_flatten(self):
     dset_split = Dataset.from_dict(
         {
             "a": [{
                 "b": {
                     "c": ["text"]
                 }
             }] * 10,
             "foo": [1] * 10
         },
         features=Features({
             "a": {
                 "b": Sequence({"c": Value("string")})
             },
             "foo": Value("int64")
         }),
     )
     dset = DatasetDict({"train": dset_split, "test": dset_split})
     dset.flatten_()
     self.assertDictEqual(dset.column_names, {
         "train": ["a.b.c", "foo"],
         "test": ["a.b.c", "foo"]
     })
     self.assertListEqual(list(dset["train"].features.keys()),
                          ["a.b.c", "foo"])
     self.assertDictEqual(
         dset["train"].features,
         Features({
             "a.b.c": Sequence(Value("string")),
             "foo": Value("int64")
         }))
Exemple #2
0
 def _create_dummy_dataset_dict(self, multiple_columns=False) -> DatasetDict:
     return DatasetDict(
         {
             "train": self._create_dummy_dataset(multiple_columns=multiple_columns),
             "test": self._create_dummy_dataset(multiple_columns=multiple_columns),
         }
     )
 def _create_dummy_dataset_dict(self) -> DatasetDict:
     return DatasetDict({
         "train": self._create_dummy_dataset(),
         "test": self._create_dummy_dataset()
     })