def test_dont_cleanup_recent_orphaned_snapshots(mongo_host, library, data, dry_run, fw_pointers_config): """ Check that we do / don't cleanup chunks based on the dry-run """ with FwPointersCtx(fw_pointers_config): today = dt.utcnow() - dtd(hours=12, seconds=1) _id = bson.ObjectId.from_datetime(today) library.write('symbol', data, prune_previous_version=False) with patch("bson.ObjectId", return_value=_id): library.snapshot('snap_name') # Remove the version document ; should cleanup assert library._collection.snapshots.delete_many({}) # No cleanup on dry-run if dry_run: run_as_main(main, '--library', 'user.library', '--host', mongo_host) assert mongo_count(library._collection) > 0 assert mongo_count(library._collection.versions) assert repr(library.read('symbol').data) == repr(data) # Nothing done assert len(library._collection.versions.find_one({})['parent']) else: run_as_main(main, '--library', 'user.library', '--host', mongo_host, '-f') assert mongo_count(library._collection) > 0 assert mongo_count(library._collection.versions) # Data still available (write with prune_previous_version will do the cleanup) assert repr(library.read('symbol').data) == repr(data) # Snapshot cleaned up assert len(library._collection.versions.find_one({})['parent'])
def test_cleanup_orphaned_chunks(mongo_host, library, data, dry_run, fw_pointers_config): """ Check that we do / don't cleanup chunks based on the dry-run """ with FwPointersCtx(fw_pointers_config): yesterday = dt.utcnow() - dtd(days=1, seconds=1) _id = bson.ObjectId.from_datetime(yesterday) with patch("bson.ObjectId", return_value=_id): library.write('symbol', data, prune_previous_version=False) # Number of chunks chunk_count = mongo_count(library._collection) # Remove the version document ; should cleanup library._collection.versions.delete_one({'_id': _id}) # No cleanup on dry-run if dry_run: run_as_main(main, '--library', 'user.library', '--host', mongo_host) assert mongo_count(library._collection) == chunk_count else: run_as_main(main, '--library', 'user.library', '--host', mongo_host, '-f') assert mongo_count(library._collection) == 0
def test_save_and_resave_reuses_chunks(library, fw_pointers_cfg): with FwPointersCtx(fw_pointers_cfg): with patch('arctic.store._ndarray_store._CHUNK_SIZE', 1000): ndarr = np.random.rand(1024) library.write('MYARR', ndarr) saved_arr = library.read('MYARR').data assert np.all(ndarr == saved_arr) orig_chunks = mongo_count(library._collection) assert orig_chunks == 9 # Concatenate more values ndarr = np.concatenate([ndarr, np.random.rand(10)]) # And change the original values - we're not a simple append ndarr[0] = ndarr[1] = ndarr[2] = 0 library.write('MYARR', ndarr) saved_arr = library.read('MYARR').data assert np.all(ndarr == saved_arr) # Should contain the original chunks, but not double the number # of chunks new_chunks = mongo_count(library._collection) assert new_chunks == 11 if fw_pointers_cfg in (FwPointersCfg.DISABLED, FwPointersCfg.HYBRID): # We hit the update (rather than upsert) code path assert mongo_count(library._collection, filter={'parent': {'$size': 2}}) == 7 if fw_pointers_cfg in (FwPointersCfg.HYBRID, FwPointersCfg.ENABLED): assert len(library._versions.find_one({'symbol': 'MYARR', 'version': 2})[FW_POINTERS_REFS_KEY]) == 9
def test_cleanup_orphaned_chunk_doesnt_break_versions(mongo_host, library, data, fw_pointers_config): """ Check that a chunk pointed to by more than one version, aren't inadvertently cleared """ with FwPointersCtx(fw_pointers_config): yesterday = dt.utcnow() - dtd(days=1, seconds=1) _id = bson.ObjectId.from_datetime(yesterday) with patch("bson.ObjectId", return_value=_id): library.write('symbol', data, prune_previous_version=False) # Re-Write the data again # Write a whole new version rather than going down the append path... # - we want two self-standing versions, the removal of one shouldn't break the other... with patch('arctic.store._ndarray_store._APPEND_COUNT', 0): library.write('symbol', data, prune_previous_version=False) library._delete_version('symbol', 1) library._collection.versions.delete_one({'_id': _id}) assert repr(library.read('symbol').data) == repr(data) run_as_main(main, '--library', 'user.library', '--host', mongo_host, '-f') assert repr(library.read('symbol').data) == repr(data) library.delete('symbol') assert mongo_count(library._collection.versions) == 0
def test_empty_append_concat_and_rewrite_2(library, fw_pointers_cfg): with FwPointersCtx(fw_pointers_cfg): ndarr2 = np.array(["a", "b", "c"]) library.write('MYARR', ndarr2) for _ in range(_APPEND_COUNT + 1): library.append('MYARR', ndarr2) saved_arr = library.read('MYARR').data assert np.all(saved_arr == np.hstack([ndarr2] * (_APPEND_COUNT + 2)))
def test_empty_append_promotes_dtype(library, fw_pointers_cfg): with FwPointersCtx(fw_pointers_cfg): ndarr = np.array(["a", "b", "c"]) ndarr2 = np.array([]) library.write('MYARR', ndarr) library.append('MYARR', ndarr2) saved_arr = library.read('MYARR').data assert np.all(saved_arr == ndarr)
def test_append_simple_ndarray_promoting_types(library, fw_pointers_cfg): with FwPointersCtx(fw_pointers_cfg): ndarr = np.ones(100, dtype='int64') library.write('MYARR', ndarr) library.append('MYARR', np.ones(100, dtype='float64')) library.append('MYARR', np.ones(100, dtype='int64')) library.append('MYARR', np.ones(205, dtype='float64')) saved_arr = library.read('MYARR').data assert np.all(np.ones(505, dtype='float64') == saved_arr)
def test_multiple_write(library, fw_pointers_cfg): with FwPointersCtx(fw_pointers_cfg): ndarr = np.empty(1000, dtype=[('abc', 'int64')]) foo = np.empty(900, dtype=[('abc', 'int64')]) library.write('MYARR', foo) v1 = library.read('MYARR').version library.write('MYARR', ndarr[:900]) v2 = library.read('MYARR').version library.append('MYARR', ndarr[-100:]) v3 = library.read('MYARR').version assert np.all(ndarr == library.read('MYARR').data) assert np.all(ndarr == library.read('MYARR', as_of=v3).data) assert np.all(foo == library.read('MYARR', as_of=v1).data) assert np.all(ndarr[:900] == library.read('MYARR', as_of=v2).data)
def test_append_ndarray_with_field_shape(library, fw_pointers_cfg): with FwPointersCtx(fw_pointers_cfg): ndarr = np.empty(10, dtype=[('A', 'int64'), ('B', 'float64', (2,))]) ndarr['A'] = 1 ndarr['B'] = 2 ndarr2 = np.empty(10, dtype=[('A', 'int64'), ('B', 'int64', (2,))]) ndarr2['A'] = 1 ndarr2['B'] = 2 library.write('MYARR', ndarr) library.append('MYARR', ndarr2) saved_arr = library.read('MYARR').data ndarr3 = np.empty(20, dtype=[('A', 'int64'), ('B', 'float64', (2,))]) ndarr3['A'] = 1 ndarr3['B'] = 2 assert np.all(ndarr3 == saved_arr)
def test_save_append_read_ndarray(library, fw_pointers_cfg): with FwPointersCtx(fw_pointers_cfg): dtype = np.dtype([('abc', 'int64')]) ndarr = np.arange(30 * 1024 * 1024 / dtype.itemsize).view(dtype=dtype) assert len(ndarr.tostring()) > 16 * 1024 * 1024 library.write('MYARR', ndarr) sliver = np.arange(30).view(dtype=dtype) library.append('MYARR', sliver) saved_arr = library.read('MYARR').data assert np.all(np.concatenate([ndarr, sliver]) == saved_arr) library.append('MYARR', sliver) saved_arr = library.read('MYARR').data assert np.all(np.concatenate([ndarr, sliver, sliver]) == saved_arr)
def test_append_after_failed_append(library, fw_pointers_cfg): with FwPointersCtx(fw_pointers_cfg): dtype = np.dtype([('abc', 'int64')]) ndarr = np.arange(30 / dtype.itemsize).view(dtype=dtype) v1 = library.write('MYARR', ndarr) sliver = np.arange(3, 4).view(dtype=dtype) v2 = library.append('MYARR', sliver) # simulate a failed append - intentionally leave an orphaned chunk lying around here library._delete_version('MYARR', v2.version, do_cleanup=False) sliver2 = np.arange(3, 5).view(dtype=dtype) v3 = library.append('MYARR', sliver2) assert np.all(ndarr == library.read('MYARR', as_of=v1.version).data) assert np.all(np.concatenate([ndarr, sliver2]) == library.read('MYARR', as_of=v3.version).data)
def test_cleanup_orphaned_chunks_ignores_recent(mongo_host, library, data, dry_run, fw_pointers_config): """ We don't cleanup any chunks in the range of today. That's just asking for trouble """ with FwPointersCtx(fw_pointers_config): yesterday = dt.utcnow() - dtd(hours=12) _id = bson.ObjectId.from_datetime(yesterday) with patch("bson.ObjectId", return_value=_id): library.write('symbol', data, prune_previous_version=False) chunk_count = mongo_count(library._collection) library._collection.versions.delete_one({'_id': _id}) if dry_run: run_as_main(main, '--library', 'user.library', '--host', mongo_host) assert mongo_count(library._collection) == chunk_count else: run_as_main(main, '--library', 'user.library', '--host', mongo_host, '-f') assert mongo_count(library._collection) == chunk_count
def test_save_append_delete_append(library, fw_pointers_cfg): with FwPointersCtx(fw_pointers_cfg): dtype = np.dtype([('abc', 'int64')]) ndarr = np.arange(30 / dtype.itemsize).view(dtype=dtype) v1 = library.write('MYARR', ndarr) sliver = np.arange(30).view(dtype=dtype) v2 = library.append('MYARR', sliver) # intentionally leave an orphaned chunk lying around here library._delete_version('MYARR', v2.version, do_cleanup=False) sliver2 = np.arange(start=10, stop=40).view(dtype=dtype) # we can't append here, as the latest version is now out of sync with version_nums. # This gets translated to a do_append by the handler anyway. v3 = library.write('MYARR', np.concatenate([ndarr, sliver2])) assert np.all(ndarr == library.read('MYARR', as_of=v1.version).data) # Check that we don't get the orphaned chunk from v2 back again. assert np.all(np.concatenate([ndarr, sliver2]) == library.read('MYARR', as_of=v3.version).data)
def test_cleanup_noop(mongo_host, library, data, dry_run, fw_pointers_config): """ Check that we do / don't cleanup chunks based on the dry-run """ with FwPointersCtx(fw_pointers_config): yesterday = dt.utcnow() - dtd(days=1, seconds=1) _id = bson.ObjectId.from_datetime(yesterday) with patch("bson.ObjectId", return_value=_id): library.write('symbol', data, prune_previous_version=False) # Number of chunks chunk_count = mongo_count(library._collection) # No cleanup on dry-run if dry_run: run_as_main(main, '--library', 'user.library', '--host', mongo_host) assert mongo_count(library._collection) == chunk_count assert repr(library.read('symbol').data) == repr(data) else: run_as_main(main, '--library', 'user.library', '--host', mongo_host, '-f') assert mongo_count(library._collection) == chunk_count assert repr(library.read('symbol').data) == repr(data)
def test_append_read_large_ndarray(library, fw_pointers_cfg): with FwPointersCtx(fw_pointers_cfg): dtype = np.dtype([('abc', 'int64')]) ndarr = np.arange(50 * 1024 * 1024 / dtype.itemsize).view(dtype=dtype) assert len(ndarr.tostring()) > 16 * 1024 * 1024 library.write('MYARR1', ndarr) # Exactly enough appends to trigger 2 re-compacts, so the result should be identical # to writing the whole array at once ndarr2 = np.arange(240).view(dtype=dtype) for n in np.split(ndarr2, 120): library.append('MYARR1', n) saved_arr = library.read('MYARR1').data assert np.all(np.concatenate([ndarr, ndarr2]) == saved_arr) library.write('MYARR2', np.concatenate([ndarr, ndarr2])) version1 = library._read_metadata('MYARR1') version2 = library._read_metadata('MYARR2') assert version1['append_count'] == version2['append_count'] assert version1['append_size'] == version2['append_size'] assert version1['segment_count'] == version2['segment_count'] assert version1['up_to'] == version2['up_to']
def test_save_read_big_1darray(library, fw_pointers_cfg): with FwPointersCtx(fw_pointers_cfg): ndarr = np.random.rand(5326, 6020).ravel() library.write('MYARR', ndarr) saved_arr = library.read('MYARR').data assert np.all(ndarr == saved_arr)
def test_save_read_massive_2darray(library, fw_pointers_cfg): with FwPointersCtx(fw_pointers_cfg): ndarr = np.random.rand(1, 320000) library.write('MYARR', ndarr) saved_arr = library.read('MYARR').data assert np.all(ndarr == saved_arr)