def test_dataset_schema_bug(): schema = {"abc": Primitive("int32"), "def": "int64"} ds = Dataset("./data/schema_bug", schema=schema, shape=(100, )) ds.flush() ds2 = Dataset("./data/schema_bug", schema=schema, shape=(100, )) schema = { "abc": "uint8", "def": { "ghi": Tensor((100, 100)), "rst": Tensor((100, 100, 100)), }, } ds = Dataset("./data/schema_bug_2", schema=schema, shape=(100, )) ds.flush() ds2 = Dataset("./data/schema_bug_2", schema=schema, shape=(100, ))
def test_primitive_repr(): primitve_object = Primitive(int) assert "'int64'" == primitve_object.__repr__()
def test_primitive_str(): primitve_object = Primitive("int64") assert "'int64'" == primitve_object.__str__()
) sentences = list(df.sentence.values) labels = list(df.label.values) data = list(zip(sentences, labels)) @transform(schema=self.schema) def load_transform(sample): return {"sentence": sample[0], "labels": sample[1]} ds = load_transform(data) return ds.store(self.tag) def main(url, tag, schema): R = Retrieve(url, tag, schema) R.fetch() R.unpack() R.push() if __name__ == "__main__": url = "https://nyu-mll.github.io/CoLA/cola_public_1.1.zip" tag = "activeloop/CoLA" schema = { "sentence": Text(shape=(None, ), max_shape=(500, )), "labels": Primitive(dtype="int64"), } main(url, tag, schema)