class TestTub(unittest.TestCase): def setUp(self): self._path = tempfile.mkdtemp() inputs = ['input'] types = ['int'] self.tub = Tub(self._path, inputs, types) def test_basic_tub_operations(self): entries = list(self.tub) self.assertEqual(len(entries), 0) write_count = 10 delete_indexes = [0, 8] records = [{'input': i} for i in range(write_count)] for record in records: self.tub.write_record(record) for index in delete_indexes: self.tub.delete_records(index) count = 0 for record in self.tub: print('Record %s' % (record)) count += 1 self.assertEqual(count, (write_count - len(delete_indexes))) self.assertEqual(len(self.tub), (write_count - len(delete_indexes))) def tearDown(self): shutil.rmtree(self._path)
def convert_to_tub_v2(paths, output_path): """ Convert from old tubs to new one :param paths: legacy tub paths :param output_path: new tub output path :return: None """ empty_record = {'__empty__': True} if type(paths) is str: paths = [paths] legacy_tubs = [LegacyTub(path) for path in paths] print(f'Total number of tubs: {len(legacy_tubs)}') for legacy_tub in legacy_tubs: # add input and type for empty records recording inputs = legacy_tub.inputs + ['__empty__'] types = legacy_tub.types + ['boolean'] output_tub = Tub(output_path, inputs, types, list(legacy_tub.meta.items())) record_paths = legacy_tub.gather_records() bar = IncrementalBar('Converting', max=len(record_paths)) previous_index = None for record_path in record_paths: try: contents = Path(record_path).read_text() record = json.loads(contents) image_path = record['cam/image_array'] ms = record['milliseconds'] current_index = int(image_path.split('_')[0]) image_path = os.path.join(legacy_tub.path, image_path) image_data = Image.open(image_path) record['cam/image_array'] = image_data # first record or they are continuous, just append if not previous_index or current_index == previous_index + 1: output_tub.write_record(record, ms) previous_index = current_index # otherwise fill the gap with empty records else: # Skipping over previous record here because it has # already been written. previous_index += 1 # Adding empty record nodes, and marking them deleted # until the next valid record. delete_list = [] while previous_index < current_index: idx = output_tub.manifest.current_index output_tub.write_record(empty_record, ms) delete_list.append(idx) previous_index += 1 output_tub.delete_records(delete_list) bar.next() except Exception as exception: print(f'Ignoring record path {record_path}\n', exception) traceback.print_exc() # writing session id into manifest metadata output_tub.close()
def post(self, tub_id): tub_path = os.path.join(self.data_path, tub_id) tub = Tub(tub_path) old_clips = self.clips_of_tub(tub_path) new_clips = tornado.escape.json_decode(self.request.body) import itertools old_frames = list(itertools.chain(*old_clips)) old_indexes = set() for frame in old_frames: old_indexes.add(frame['_index']) new_frames = list(itertools.chain(*new_clips['clips'])) new_indexes = set() for frame in new_frames: new_indexes.add(frame['_index']) frames_to_delete = [ index for index in old_indexes if index not in new_indexes ] tub.delete_records(frames_to_delete)
def benchmark(): # Change to a non SSD storage path path = Path('/media/rahulrav/Cruzer/benchmark') # Recreate paths if os.path.exists(path.absolute().as_posix()): shutil.rmtree(path) inputs = ['input'] types = ['int'] tub = Tub(path.as_posix(), inputs, types, max_catalog_len=1000) write_count = 1000 for i in range(write_count): record = {'input': i} tub.write_record(record) deletions = np.random.randint(0, write_count, 100) tub.delete_records(deletions) for record in tub: print('Record %s' % record) tub.close()