def benchmark(): # Change to a non SSD storage path path = Path('/media/rahulrav/Cruzer/benchmark') # Recreate paths if os.path.exists(path.absolute().as_posix()): shutil.rmtree(path) inputs = ['input'] types = ['int'] tub = Tub(path.as_posix(), inputs, types, max_catalog_len=1000) write_count = 1000 for i in range(write_count): record = {'input': i} tub.write_record(record) deletions = np.random.randint(0, write_count, 100) for index in deletions: index = int(index) tub.delete_record(index) for record in tub: print('Record %s' % record) tub.close()
def convert_to_tub_v2(paths, output_path): """ Convert from old tubs to new one :param paths: legacy tub paths :param output_path: new tub output path :return: None """ empty_record = {'__empty__': True} if type(paths) is str: paths = [paths] legacy_tubs = [LegacyTub(path) for path in paths] print(f'Total number of tubs: {len(legacy_tubs)}') for legacy_tub in legacy_tubs: # add input and type for empty records recording inputs = legacy_tub.inputs + ['__empty__'] types = legacy_tub.types + ['boolean'] output_tub = Tub(output_path, inputs, types, list(legacy_tub.meta.items())) record_paths = legacy_tub.gather_records() bar = IncrementalBar('Converting', max=len(record_paths)) previous_index = None for record_path in record_paths: try: contents = Path(record_path).read_text() record = json.loads(contents) image_path = record['cam/image_array'] ms = record['milliseconds'] current_index = int(image_path.split('_')[0]) image_path = os.path.join(legacy_tub.path, image_path) image_data = Image.open(image_path) record['cam/image_array'] = image_data # first record or they are continuous, just append if not previous_index or current_index == previous_index + 1: output_tub.write_record(record, ms) previous_index = current_index # otherwise fill the gap with empty records else: # Skipping over previous record here because it has # already been written. previous_index += 1 # Adding empty record nodes, and marking them deleted # until the next valid record. delete_list = [] while previous_index < current_index: idx = output_tub.manifest.current_index output_tub.write_record(empty_record, ms) delete_list.append(idx) previous_index += 1 output_tub.delete_records(delete_list) bar.next() except Exception as exception: print(f'Ignoring record path {record_path}\n', exception) traceback.print_exc() # writing session id into manifest metadata output_tub.close()