def test_run_write_carbon_binary(): jsonSchema = "[{stringField:string},{shortField:short},{intField:int},{binaryField:binary}]" path = "/tmp/data/writeCarbon" + str(time.time()) if os.path.exists(path): shutil.rmtree(path) jpg_path = IMAGE_DATA_PATH + "/carbondatalogo.jpg" writer = CarbonWriter() \ .builder() \ .outputPath(path) \ .withCsvInput(jsonSchema) \ .writtenBy("pycarbon") \ .build() with open(jpg_path, mode='rb+') as file_object: content = file_object.read() for i in range(0, 10): from jnius import autoclass arrayListClass = autoclass("java.util.ArrayList") data_list = arrayListClass() data_list.add("pycarbon") data_list.add(str(i)) data_list.add(str(i * 10)) data_list.add(content) writer.write(data_list.toArray()) writer.close() reader = CarbonReader() \ .builder() \ .withFolder(path) \ .withBatch(1000) \ .build() i = 0 while reader.hasNext(): rows = reader.readNextBatchRow() for row in rows: i += 1 for column in row: from jnius.jnius import ByteArray if 1 == i and isinstance(column, ByteArray) and len(column) > 1000: with open(path + "/image.jpg", 'wb+') as file_object: file_object.write(column.tostring()) assert 10 == i reader.close() shutil.rmtree(path)
def test_run_write_carbon_binary_base64_encode_decodeInJava_many_files(): jsonSchema = "[{stringField:string},{shortField:short},{intField:int},{binaryField:binary},{txtField:string}]" path = "/tmp/data/writeCarbon" + str(time.time()) if os.path.exists(path): shutil.rmtree(path) jpg_path = IMAGE_DATA_PATH + "/flowers" from jnius import autoclass sdkUtilClass = autoclass("org.apache.carbondata.sdk.file.utils.SDKUtil") jpg_files = sdkUtilClass.listFiles(jpg_path, '.jpg') writer = CarbonWriter() \ .builder() \ .outputPath(path) \ .withCsvInput(jsonSchema) \ .writtenBy("pycarbon") \ .withLoadOption("binary_decoder", "base64") \ .withPageSizeInMb(1) \ .build() for i in range(0, jpg_files.size()): jpg_path = jpg_files.get(i) with open(jpg_path, mode='rb+') as file_object: content = file_object.read() with open(str(jpg_path).replace('.jpg', '.txt'), mode='r+') as file_object: txt = file_object.read() arrayListClass = autoclass("java.util.ArrayList") data_list = arrayListClass() data_list.add("pycarbon") data_list.add(str(i)) data_list.add(str(i * 10)) data_list.add(base64.b64encode(content)) data_list.add(txt) writer.write(data_list.toArray()) writer.close() reader = CarbonReader() \ .builder() \ .withFolder(path) \ .withBatch(1000) \ .build() i = 0 while reader.hasNext(): rows = reader.readNextBatchRow() for row in rows: i += 1 for column in row: from jnius.jnius import ByteArray if isinstance(column, ByteArray) and len(column) > 1000 and i < 20: with open(path + "/image" + str(i) + ".jpg", 'wb+') as file_object: file_object.write((column.tostring())) assert 3 == i reader.close() shutil.rmtree(path)
def test_run_write_carbon(): jsonSchema = "[{stringField:string},{shortField:short},{intField:int}]" path = "/tmp/data/writeCarbon" + str(time.time()) if os.path.exists(path): shutil.rmtree(path) writer = CarbonWriter() \ .builder() \ .outputPath(path) \ .withCsvInput(jsonSchema) \ .writtenBy("pycarbon") \ .build() for i in range(0, 10): from jnius import autoclass arrayListClass = autoclass("java.util.ArrayList") data_list = arrayListClass() data_list.add("pycarbon") data_list.add(str(i)) data_list.add(str(i * 10)) writer.write(data_list.toArray()) writer.close() reader = CarbonReader() \ .builder() \ .withFolder(path) \ .withBatch(1000) \ .build() i = 0 while reader.hasNext(): rows = reader.readNextBatchRow() i += len(rows) assert 10 == i reader.close() carbonSchemaReader = CarbonSchemaReader() schema = carbonSchemaReader.readSchema(path) assert 3 == schema.getFieldsLength() writer = CarbonWriter() \ .builder() \ .outputPath(path) \ .withCsvInput(jsonSchema) \ .writtenBy("pycarbon") \ .build() for i in range(0, 10): from jnius import autoclass arrayListClass = autoclass("java.util.ArrayList") data_list = arrayListClass() data_list.add("pycarbon") data_list.add(str(i)) data_list.add(str(i * 10)) writer.write(data_list.toArray()) writer.close() carbonSchemaReader = CarbonSchemaReader() schema = carbonSchemaReader.readSchema(getAsBuffer=False, path=path, validateSchema=True) assert 3 == schema.getFieldsLength() shutil.rmtree(path)