def test_flatten(self): dset_split = Dataset.from_dict( { "a": [{ "b": { "c": ["text"] } }] * 10, "foo": [1] * 10 }, features=Features({ "a": { "b": Sequence({"c": Value("string")}) }, "foo": Value("int64") }), ) dset = DatasetDict({"train": dset_split, "test": dset_split}) dset.flatten_() self.assertDictEqual(dset.column_names, { "train": ["a.b.c", "foo"], "test": ["a.b.c", "foo"] }) self.assertListEqual(list(dset["train"].features.keys()), ["a.b.c", "foo"]) self.assertDictEqual( dset["train"].features, Features({ "a.b.c": Sequence(Value("string")), "foo": Value("int64") }))
def _create_dummy_dataset_dict(self, multiple_columns=False) -> DatasetDict: return DatasetDict( { "train": self._create_dummy_dataset(multiple_columns=multiple_columns), "test": self._create_dummy_dataset(multiple_columns=multiple_columns), } )
def _create_dummy_dataset_dict(self) -> DatasetDict: return DatasetDict({ "train": self._create_dummy_dataset(), "test": self._create_dummy_dataset() })