def test_write_two_images_mindrecord(): """test two images to mindrecord""" if os.path.exists("{}".format(CV_FILE_NAME + ".db")): os.remove(CV_FILE_NAME + ".db") if os.path.exists("{}".format(CV_FILE_NAME)): os.remove(CV_FILE_NAME) writer = FileWriter(CV_FILE_NAME, FILES_NUM) data = get_two_bytes_data(MAP_FILE_NAME) cv_schema_json = { "img_data": { "type": "bytes" }, "label_data": { "type": "bytes" } } writer.add_schema(cv_schema_json, "two_images_schema") writer.write_raw_data(data) writer.commit() assert os.path.exists(CV_FILE_NAME) assert os.path.exists(CV_FILE_NAME + ".db") read(CV_FILE_NAME, 2) if os.path.exists("{}".format(CV_FILE_NAME + ".db")): os.remove(CV_FILE_NAME + ".db") if os.path.exists("{}".format(CV_FILE_NAME)): os.remove(CV_FILE_NAME)
def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="ssd.mindrecord", file_num=8): """Create MindRecord file.""" mindrecord_dir = config.mindrecord_dir mindrecord_path = os.path.join(mindrecord_dir, prefix) writer = FileWriter(mindrecord_path, file_num) if dataset == "coco": images, image_path_dict, image_anno_dict = create_coco_label(is_training) else: images, image_path_dict, image_anno_dict = filter_valid_data(config.image_dir, config.anno_path) ssd_json = { "img_id": {"type": "int32", "shape": [1]}, "image": {"type": "bytes"}, "annotation": {"type": "int32", "shape": [-1, 5]}, } writer.add_schema(ssd_json, "ssd_json") for img_id in images: image_path = image_path_dict[img_id] with open(image_path, 'rb') as f: img = f.read() annos = np.array(image_anno_dict[img_id], dtype=np.int32) img_id = np.array([img_id], dtype=np.int32) row = {"img_id": img_id, "image": img, "annotation": annos} writer.write_raw_data([row]) writer.commit()
def data_to_mindrecord_byte_image(image_dir, anno_path, mindrecord_dir, prefix, file_num): """Create MindRecord file by image_dir and anno_path.""" mindrecord_path = os.path.join(mindrecord_dir, prefix) writer = FileWriter(mindrecord_path, file_num) image_files, image_anno_dict = filter_valid_data(image_dir, anno_path) yolo_json = { "image": { "type": "bytes" }, "annotation": { "type": "int64", "shape": [-1, 5] }, } writer.add_schema(yolo_json, "yolo_json") for image_name in image_files: image_path = os.path.join(image_dir, image_name) with open(image_path, 'rb') as f: img = f.read() annos = np.array(image_anno_dict[image_name]) row = {"image": img, "annotation": annos} writer.write_raw_data([row]) writer.commit()
def test_cv_file_writer_no_raw(): """test cv file writer without raw data.""" writer = FileWriter(NLP_FILE_NAME) data = list( get_nlp_data("../data/mindrecord/testAclImdbData/pos", "../data/mindrecord/testAclImdbData/vocab.txt", 10)) nlp_schema_json = { "input_ids": { "type": "int64", "shape": [1, -1] }, "input_mask": { "type": "int64", "shape": [1, -1] }, "segment_ids": { "type": "int64", "shape": [1, -1] } } writer.add_schema(nlp_schema_json, "no_raw_schema") writer.write_raw_data(data) writer.commit() reader = FileReader(NLP_FILE_NAME) count = 0 for index, x in enumerate(reader.get_next()): count += 1 assert len(x) == 3 logger.info("#item{}: {}".format(index, x)) assert count == 10 reader.close() os.remove(NLP_FILE_NAME) os.remove("{}.db".format(NLP_FILE_NAME))
def create_diff_page_size_cv_mindrecord(files_num): """tutorial for cv dataset writer.""" if os.path.exists(CV1_FILE_NAME): os.remove(CV1_FILE_NAME) if os.path.exists("{}.db".format(CV1_FILE_NAME)): os.remove("{}.db".format(CV1_FILE_NAME)) writer = FileWriter(CV1_FILE_NAME, files_num) writer.set_page_size(1 << 26) # 64MB cv_schema_json = { "file_name": { "type": "string" }, "label": { "type": "int32" }, "data": { "type": "bytes" } } data = [{ "file_name": "001.jpg", "label": 43, "data": bytes('0xffsafdafda', encoding='utf-8') }] writer.add_schema(cv_schema_json, "img_schema") writer.add_index(["file_name", "label"]) writer.write_raw_data(data) writer.commit()
def test_cv_file_writer_shard_num_10(): """test file writer when shard num equals 10.""" writer = FileWriter(CV_FILE_NAME, 10) data = get_data("../data/mindrecord/testImageNetData/") cv_schema_json = { "file_name": { "type": "string" }, "label": { "type": "int64" }, "data": { "type": "bytes" } } writer.add_schema(cv_schema_json, "img_schema") writer.add_index(["file_name", "label"]) writer.write_raw_data(data) writer.commit() paths = [ "{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) for x in range(10) ] for x in paths: os.remove("{}".format(x)) os.remove("{}.db".format(x))
def test_cv_file_writer_without_data(): """test cv file writer without data.""" writer = FileWriter(CV_FILE_NAME, 1) cv_schema_json = { "file_name": { "type": "string" }, "label": { "type": "int64" }, "data": { "type": "bytes" } } writer.add_schema(cv_schema_json, "img_schema") writer.add_index(["file_name", "label"]) writer.commit() reader = FileReader(CV_FILE_NAME) count = 0 for index, x in enumerate(reader.get_next()): count = count + 1 logger.info("#item{}: {}".format(index, x)) assert count == 0 reader.close() os.remove(CV_FILE_NAME) os.remove("{}.db".format(CV_FILE_NAME))
def test_file_writer_raw_data_038(): """test write raw data without verify.""" shard_num = 11 writer = FileWriter("test_file_writer_raw_data_", shard_num) data_raw = get_data("../data/mindrecord/testImageNetData/") schema_json = {"file_name": {"type": "string"}, "label": {"type": "number"}, "data": {"type": "bytes"}} writer.add_schema(schema_json, "img_schema") writer.add_index(["file_name"]) for _ in range(shard_num): writer.write_raw_data(data_raw, False) writer.commit() file_name = "" if shard_num > 1: file_name = '99' if shard_num > 99 else str(shard_num - 1) reader = FileReader("test_file_writer_raw_data_" + file_name) i = 0 for _, _ in enumerate(reader.get_next()): i = i + 1 assert i == shard_num * 10 reader.close() if shard_num == 1: os.remove("test_file_writer_raw_data_") os.remove("test_file_writer_raw_data_.db") return for x in range(shard_num): n = str(x) if shard_num > 10: n = '0' + str(x) if x < 10 else str(x) if os.path.exists("test_file_writer_raw_data_{}".format(n)): os.remove("test_file_writer_raw_data_{}".format(n)) if os.path.exists("test_file_writer_raw_data_{}.db".format(n)): os.remove("test_file_writer_raw_data_{}.db".format(n))
def file_based_convert_examples_to_features(self, input_file, output_file): """"Convert a set of `InputExample`s to a MindDataset file.""" examples = self._read_tsv(input_file) writer = FileWriter(file_name=output_file, shard_num=1) nlp_schema = { "input_ids": {"type": "int64", "shape":[-1]}, "input_mask": {"type": "int64", "shape":[-1]}, "segment_ids": {"type": "int64", "shape":[-1]}, "label_ids": {"type": "int64", "shape":[-1]}, } writer.add_schema(nlp_schema, "proprocessed classification dataset") data = [] for index, example in enumerate(examples): if index % 10000 == 0: logging.info("Writing example %d of %d" % (index, len(examples))) record = self._convert_example_to_record(example, self.max_seq_len, self.tokenizer) sample = { "input_ids": np.array(record.input_ids, dtype=np.int64), "input_mask": np.array(record.input_mask, dtype=np.int64), "segment_ids": np.array(record.segment_ids, dtype=np.int64), "label_ids": np.array([record.label_id], dtype=np.int64), } data.append(sample) writer.write_raw_data(data) writer.commit()
def write_to_mindrecord(data, path, shared_num=1): """generate mindrecord""" if not os.path.isabs(path): path = os.path.abspath(path) writer = FileWriter(path, shared_num) data_schema = { "src_tokens": { "type": "int32", "shape": [-1] }, "src_tokens_length": { "type": "int32", "shape": [-1] }, "label_idx": { "type": "int32", "shape": [-1] } } writer.add_schema(data_schema, "fasttext") for item in data: item['src_tokens'] = np.array(item['src_tokens'], dtype=np.int32) item['src_tokens_length'] = np.array(item['src_tokens_length'], dtype=np.int32) item['label_idx'] = np.array(item['label_idx'], dtype=np.int32) writer.write_raw_data([item]) writer.commit()
def test_cv_minddataset_writer_tutorial(): """tutorial for cv dataset writer.""" paths = [ "{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) for x in range(FILES_NUM) ] for x in paths: os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None os.remove("{}.db".format(x)) if os.path.exists( "{}.db".format(x)) else None writer = FileWriter(CV_FILE_NAME, FILES_NUM) data = get_data(CV_DIR_NAME) cv_schema_json = { "file_name": { "type": "string" }, "label": { "type": "int32" }, "data": { "type": "bytes" } } writer.add_schema(cv_schema_json, "img_schema") writer.add_index(["file_name", "label"]) writer.write_raw_data(data) writer.commit() for x in paths: os.remove("{}".format(x)) os.remove("{}.db".format(x))
def test_cv_file_writer_loop_and_read(): """tutorial for cv dataset loop writer.""" writer = FileWriter(CV2_FILE_NAME, FILES_NUM) data = get_data("../data/mindrecord/testImageNetData/") cv_schema_json = {"file_name": {"type": "string"}, "label": {"type": "int64"}, "data": {"type": "bytes"}} writer.add_schema(cv_schema_json, "img_schema") writer.add_index(["file_name", "label"]) for row in data: writer.write_raw_data([row]) writer.commit() reader = FileReader(CV2_FILE_NAME + "0") count = 0 for index, x in enumerate(reader.get_next()): assert len(x) == 3 count = count + 1 logger.info("#item{}: {}".format(index, x)) assert count == 10 reader.close() paths = ["{}{}".format(CV2_FILE_NAME, str(x).rjust(1, '0')) for x in range(FILES_NUM)] for x in paths: os.remove("{}".format(x)) os.remove("{}.db".format(x))
def create_text_mindrecord(): # methood to create mindrecord with string data, used to generate testTextMindRecord/test.mindrecord from mindspore.mindrecord import FileWriter mindrecord_file_name = "test.mindrecord" data = [ { "english": "This is a text file.", "chinese": "今天天气太好了我们一起去外面玩吧" }, { "english": "Be happy every day.", "chinese": "男默女泪" }, { "english": "Good luck to everyone.", "chinese": "江州市长江大桥参加了长江大桥的通车仪式" }, ] writer = FileWriter(mindrecord_file_name) schema = { "english": { "type": "string" }, "chinese": { "type": "string" }, } writer.add_schema(schema) writer.write_raw_data(data) writer.commit()
def add_and_remove_nlp_file(): """add/remove nlp file""" paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0')) for x in range(FILES_NUM)] for x in paths: if os.path.exists("{}".format(x)): os.remove("{}".format(x)) if os.path.exists("{}.db".format(x)): os.remove("{}.db".format(x)) writer = FileWriter(NLP_FILE_NAME, FILES_NUM) data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)] nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"}, "rating": {"type": "float32"}, "input_ids": {"type": "int64", "shape": [-1]}, "input_mask": {"type": "int64", "shape": [1, -1]}, "segment_ids": {"type": "int64", "shape": [2, -1]} } writer.set_header_size(1 << 14) writer.set_page_size(1 << 15) writer.add_schema(nlp_schema_json, "nlp_schema") writer.add_index(["id", "rating"]) writer.write_raw_data(data) writer.commit() yield "yield_nlp_data" for x in paths: os.remove("{}".format(x)) os.remove("{}.db".format(x))
def write_mindrecord_tutorial(): writer = FileWriter(MINDRECORD_FILE_NAME) data = get_data("./ImageNetDataSimulation") schema_json = { "file_name": { "type": "string" }, "label": { "type": "int64" }, "data": { "type": "bytes" } } writer.add_schema(schema_json, "img_schema") writer.add_index(["file_name", "label"]) writer.write_raw_data(data) writer.commit() reader = FileReader(MINDRECORD_FILE_NAME) count = 0 for index, x in enumerate(reader.get_next()): assert len(x) == 3 count = count + 1 # print("#item {}: {}".format(index, x)) assert count == 20 reader.close()
def convert_to_mindrecord(features, labels, mindrecord_path): schema_json = {"id": {"type": "int32"}, "label": {"type": "int32"}, "feature": {"type": "int32", "shape": [-1]}} if not os.path.exists(mindrecord_path): os.makedirs(mindrecord_path) else: print(mindrecord_path, 'exists. Please make sure it is empty!') file_name = os.path.join(mindrecord_path, 'style.mindrecord') print('writing mindrecord into', file_name) def get_imdb_data(features, labels): data_list = [] for i, (label, feature) in enumerate(zip(labels, features)): data_json = {"id": i, "label": int(label), "feature": feature.reshape(-1)} data_list.append(data_json) return data_list writer = FileWriter(file_name, shard_num=4) data = get_imdb_data(features, labels) writer.add_schema(schema_json, "style_schema") writer.add_index(["id", "label"]) writer.write_raw_data(data) writer.commit() print('done')
def test_nlp_file_writer_tutorial(): """tutorial for nlp file writer.""" writer = FileWriter(NLP_FILE_NAME, FILES_NUM) data = list( get_nlp_data("../data/mindrecord/testAclImdbData/pos", "../data/mindrecord/testAclImdbData/vocab.txt", 10)) nlp_schema_json = { "id": { "type": "string" }, "label": { "type": "int32" }, "rating": { "type": "float32" }, "input_ids": { "type": "int64", "shape": [1, -1] }, "input_mask": { "type": "int64", "shape": [1, -1] }, "segment_ids": { "type": "int64", "shape": [1, -1] } } writer.add_schema(nlp_schema_json, "nlp_schema") writer.add_index(["id", "rating"]) writer.write_raw_data(data) writer.commit()
def init_writer(mr_schema): """ init writer """ print("Init writer ...") mr_writer = FileWriter(args.mindrecord_file, args.mindrecord_partitions) # set the header size if args.mindrecord_header_size_by_bit != 24: header_size = 1 << args.mindrecord_header_size_by_bit mr_writer.set_header_size(header_size) # set the page size if args.mindrecord_page_size_by_bit != 25: page_size = 1 << args.mindrecord_page_size_by_bit mr_writer.set_page_size(page_size) # create the schema mr_writer.add_schema(mr_schema, "mindrecord_graph_schema") # open file and set header mr_writer.open_and_set_header() return mr_writer
def test_cv_file_writer_absolute_path(): """test cv file writer when file name is absolute path.""" file_name = "/tmp/" + str(uuid.uuid4()) writer = FileWriter(file_name, FILES_NUM) data = get_data("../data/mindrecord/testImageNetData/") cv_schema_json = { "file_name": { "type": "string" }, "label": { "type": "int64" }, "data": { "type": "bytes" } } writer.add_schema(cv_schema_json, "img_schema") writer.add_index(["file_name", "label"]) writer.write_raw_data(data) writer.commit() paths = [ "{}{}".format(file_name, str(x).rjust(1, '0')) for x in range(FILES_NUM) ] for x in paths: os.remove("{}".format(x)) os.remove("{}.db".format(x))
def data_to_mindrecord_byte_image(is_training=True, prefix="deeptext.mindrecord", file_num=8): """Create MindRecord file.""" mindrecord_dir = config.mindrecord_dir mindrecord_path = os.path.join(mindrecord_dir, prefix) writer = FileWriter(mindrecord_path, file_num) image_files, image_anno_dict = create_label(is_training) deeptext_json = { "image": { "type": "bytes" }, "annotation": { "type": "int32", "shape": [-1, 6] }, } writer.add_schema(deeptext_json, "deeptext_json") for image_name in image_files: with open(image_name, 'rb') as f: img = f.read() annos = np.array(image_anno_dict[image_name], dtype=np.int32) row = {"image": img, "annotation": annos} writer.write_raw_data([row]) writer.commit()
def test_cv_file_writer_no_blob(): """test cv file writer without blob data.""" writer = FileWriter(CV_FILE_NAME, 1) data = get_data("../data/mindrecord/testImageNetData/") cv_schema_json = { "file_name": { "type": "string" }, "label": { "type": "int64" } } writer.add_schema(cv_schema_json, "no_blob_schema") writer.add_index(["file_name", "label"]) writer.write_raw_data(data) writer.commit() reader = FileReader(CV_FILE_NAME) count = 0 for index, x in enumerate(reader.get_next()): count += 1 assert len(x) == 2 logger.info("#item{}: {}".format(index, x)) assert count == 10 reader.close() os.remove(CV_FILE_NAME) os.remove("{}.db".format(CV_FILE_NAME))
def voc_data_to_mindrecord(mindrecord_dir, is_training, prefix="ssd.mindrecord", file_num=8): """Create MindRecord file by image_dir and anno_path.""" mindrecord_path = os.path.join(mindrecord_dir, prefix) writer = FileWriter(mindrecord_path, file_num) images, image_path_dict, image_anno_dict = create_voc_label(is_training) ssd_json = { "img_id": { "type": "int32", "shape": [1] }, "image": { "type": "bytes" }, "annotation": { "type": "int32", "shape": [-1, 5] }, } writer.add_schema(ssd_json, "ssd_json") for img_id in images: image_path = image_path_dict[img_id] with open(image_path, 'rb') as f: img = f.read() annos = np.array(image_anno_dict[img_id], dtype=np.int32) img_id = np.array([img_id], dtype=np.int32) row = {"img_id": img_id, "image": img, "annotation": annos} writer.write_raw_data([row]) writer.commit()
def _convert_to_mindrecord(data_home, features, labels, weight_np=None, training=True): """ convert imdb dataset to mindrecoed dataset """ if weight_np is not None: np.savetxt(os.path.join(data_home, 'weight.txt'), weight_np) # write mindrecord schema_json = {"id": {"type": "int32"}, "label": {"type": "int32"}, "feature": {"type": "int32", "shape": [-1]}} data_dir = os.path.join(data_home, "aclImdb_train.mindrecord") if not training: data_dir = os.path.join(data_home, "aclImdb_test.mindrecord") def get_imdb_data(features, labels): data_list = [] for i, (label, feature) in enumerate(zip(labels, features)): data_json = {"id": i, "label": int(label), "feature": feature.reshape(-1)} data_list.append(data_json) return data_list writer = FileWriter(data_dir, shard_num=4) data = get_imdb_data(features, labels) writer.add_schema(schema_json, "nlp_schema") writer.add_index(["id", "label"]) writer.write_raw_data(data) writer.commit()
def add_and_remove_cv_file(): """add/remove cv file""" paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) for x in range(FILES_NUM)] try: for x in paths: if os.path.exists("{}".format(x)): os.remove("{}".format(x)) if os.path.exists("{}.db".format(x)): os.remove("{}.db".format(x)) writer = FileWriter(CV_FILE_NAME, FILES_NUM) data = get_data(CV_DIR_NAME) cv_schema_json = {"id": {"type": "int32"}, "file_name": {"type": "string"}, "label": {"type": "int32"}, "data": {"type": "bytes"}} writer.add_schema(cv_schema_json, "img_schema") writer.add_index(["file_name", "label"]) writer.write_raw_data(data) writer.commit() yield "yield_cv_data" except Exception as error: for x in paths: os.remove("{}".format(x)) os.remove("{}.db".format(x)) raise error else: for x in paths: os.remove("{}".format(x)) os.remove("{}.db".format(x))
def _build_mindrecord(self, mindrecord_path): writer = FileWriter(file_name=mindrecord_path, shard_num=self.shard_num) writer.add_schema(seg_schema, "seg_schema") data = [] cnt = 0 print('number of samples:', self.num_images) for idx in range(len(self.images)): sample_ = {'file_name': os.path.basename(self.images[idx])} with open(self.images[idx], 'rb') as f: sample_['data'] = f.read() white_io = BytesIO() mask = Image.open(self.masks[idx]) mask = Image.fromarray(self._class_to_index(np.array(mask)).astype('uint8')) mask.save(white_io, 'PNG') mask_bytes = white_io.getvalue() sample_['label'] = white_io.getvalue() data.append(sample_) cnt += 1 if cnt % 10 == 0: writer.write_raw_data(data) data = [] if data: writer.write_raw_data(data) writer.commit() print('number of samples written:', cnt)
def convert_yolo_data_to_mindrecord(): '''convert_yolo_data_to_mindrecord''' writer = FileWriter(mindrecord_file_name, mindrecord_num) yolo_json = { "image": { "type": "bytes" }, "annotation": { "type": "float64", "shape": [-1, 6] } } print('Loading train data...') image_files, anno_files = prepare_file_paths() dataset_size = len(anno_files) assert dataset_size == len(image_files) logger.info("#size of dataset: {}".format(dataset_size)) data = [] for i in range(dataset_size): data.append(get_data(image_files[i], anno_files[i])) print('Writing train data to mindrecord...') writer.add_schema(yolo_json, "yolo_json") if data is None: raise ValueError("None needs writing to mindrecord.") writer.write_raw_data(data) writer.commit()
def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="fasterrcnn.mindrecord", file_num=8): """Create MindRecord file.""" mindrecord_dir = config.mindrecord_dir mindrecord_path = os.path.join(mindrecord_dir, prefix) writer = FileWriter(mindrecord_path, file_num) if dataset == "coco": image_files, image_anno_dict = create_coco_label(is_training) else: image_files, image_anno_dict = filter_valid_data( config.IMAGE_DIR, config.ANNO_PATH) fasterrcnn_json = { "image": { "type": "bytes" }, "annotation": { "type": "int32", "shape": [-1, 6] }, } writer.add_schema(fasterrcnn_json, "fasterrcnn_json") for image_name in image_files: with open(image_name, 'rb') as f: img = f.read() annos = np.array(image_anno_dict[image_name], dtype=np.int32) row = {"image": img, "annotation": annos} writer.write_raw_data([row]) writer.commit()
def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="maskrcnn.mindrecord", file_num=8): """Create MindRecord file.""" mindrecord_dir = config.mindrecord_dir mindrecord_path = os.path.join(mindrecord_dir, prefix) writer = FileWriter(mindrecord_path, file_num) if dataset == "coco": image_files, image_anno_dict, masks, masks_shape = create_coco_label(is_training) else: print("Error unsupported other dataset") return maskrcnn_json = { "image": {"type": "bytes"}, "annotation": {"type": "int32", "shape": [-1, 6]}, "mask": {"type": "bytes"}, "mask_shape": {"type": "int32", "shape": [-1]}, } writer.add_schema(maskrcnn_json, "maskrcnn_json") image_files_num = len(image_files) for ind, image_name in enumerate(image_files): with open(image_name, 'rb') as f: img = f.read() annos = np.array(image_anno_dict[image_name], dtype=np.int32) mask = masks[image_name] mask_shape = masks_shape[image_name] row = {"image": img, "annotation": annos, "mask": mask, "mask_shape": mask_shape} if (ind + 1) % 10 == 0: print("writing {}/{} into mindrecord".format(ind + 1, image_files_num)) writer.write_raw_data([row]) writer.commit()
def data_to_mindrecord_byte_image(image_files, image_anno_dict, dst_dir, prefix="cptn_mlt.mindrecord", file_num=1): """Create MindRecord file.""" mindrecord_path = os.path.join(dst_dir, prefix) writer = FileWriter(mindrecord_path, file_num) ctpn_json = { "image": { "type": "bytes" }, "annotation": { "type": "int32", "shape": [-1, 5] }, } writer.add_schema(ctpn_json, "ctpn_json") for image_name in image_files: with open(image_name, 'rb') as f: img = f.read() annos = np.array(image_anno_dict[image_name], dtype=np.int32) print("img name is {}, anno is {}".format(image_name, annos)) row = {"image": img, "annotation": annos} writer.write_raw_data([row]) writer.commit()
def test_shard_4_raw_data_1(): """test file writer when shard_num equals 4 and number of sample equals 1.""" writer = FileWriter(CV_FILE_NAME, FILES_NUM) schema_json = { "file_name": { "type": "string" }, "label": { "type": "number" } } writer.add_schema(schema_json, "img_schema") writer.add_index(["label"]) data = [{"file_name": "001.jpg", "label": 1}] writer.write_raw_data(data) writer.commit() reader = FileReader(CV_FILE_NAME + "0") count = 0 for index, x in enumerate(reader.get_next()): assert len(x) == 2 count = count + 1 logger.info("#item{}: {}".format(index, x)) assert count == 1 reader.close() paths = [ "{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) for x in range(FILES_NUM) ] for x in paths: os.remove("{}".format(x)) os.remove("{}.db".format(x))