def test_cleanup_orphaned_snapshots_nop(mongo_host, library, data, dry_run): """ Check that we do / don't cleanup chunks based on the dry-run """ yesterday = dt.utcnow() - dtd(days=1, seconds=1) _id = bson.ObjectId.from_datetime(yesterday) library.write('symbol', data, prune_previous_version=False) with patch("bson.ObjectId", return_value=_id): library.snapshot('snap_name') # No cleanup on dry-run if dry_run: run_as_main(main, '--library', 'user.library', '--host', mongo_host) assert mongo_count(library._collection) > 0 assert mongo_count(library._collection.versions) assert repr(library.read('symbol').data) == repr(data) # Nothing done assert len(library._collection.versions.find_one({})['parent']) else: run_as_main(main, '--library', 'user.library', '--host', mongo_host, '-f') assert mongo_count(library._collection) > 0 assert mongo_count(library._collection.versions) # Data still available (write with prune_previous_version will do the cleanup) assert repr(library.read('symbol').data) == repr(data) # Nothing done assert len(library._collection.versions.find_one({})['parent'])
def test_save_and_resave_reuses_chunks(library, fw_pointers_cfg): with FwPointersCtx(fw_pointers_cfg): with patch('arctic.store._ndarray_store._CHUNK_SIZE', 1000): ndarr = np.random.rand(1024) library.write('MYARR', ndarr) saved_arr = library.read('MYARR').data assert np.all(ndarr == saved_arr) orig_chunks = mongo_count(library._collection) assert orig_chunks == 9 # Concatenate more values ndarr = np.concatenate([ndarr, np.random.rand(10)]) # And change the original values - we're not a simple append ndarr[0] = ndarr[1] = ndarr[2] = 0 library.write('MYARR', ndarr) saved_arr = library.read('MYARR').data assert np.all(ndarr == saved_arr) # Should contain the original chunks, but not double the number # of chunks new_chunks = mongo_count(library._collection) assert new_chunks == 11 if fw_pointers_cfg in (FwPointersCfg.DISABLED, FwPointersCfg.HYBRID): # We hit the update (rather than upsert) code path assert mongo_count(library._collection, filter={'parent': {'$size': 2}}) == 7 if fw_pointers_cfg in (FwPointersCfg.HYBRID, FwPointersCfg.ENABLED): assert len(library._versions.find_one({'symbol': 'MYARR', 'version': 2})[FW_POINTERS_REFS_KEY]) == 9
def test_save_and_resave_reuses_chunks(library, fw_pointers_cfg): with FwPointersCtx(fw_pointers_cfg): with patch('arctic.store._ndarray_store._CHUNK_SIZE', 1000): ndarr = np.random.rand(1024) library.write('MYARR', ndarr) saved_arr = library.read('MYARR').data assert np.all(ndarr == saved_arr) orig_chunks = mongo_count(library._collection) assert orig_chunks == 9 # Concatenate more values ndarr = np.concatenate([ndarr, np.random.rand(10)]) # And change the original values - we're not a simple append ndarr[0] = ndarr[1] = ndarr[2] = 0 library.write('MYARR', ndarr) saved_arr = library.read('MYARR').data assert np.all(ndarr == saved_arr) # Should contain the original chunks, but not double the number # of chunks new_chunks = mongo_count(library._collection) assert new_chunks == 11 if fw_pointers_cfg in (FwPointersCfg.DISABLED, FwPointersCfg.HYBRID): # We hit the update (rather than upsert) code path assert mongo_count(library._collection, filter={'parent': {'$size': 2}}) == 7 if fw_pointers_cfg in (FwPointersCfg.HYBRID, FwPointersCfg.ENABLED): assert len(library._versions.find_one({'symbol': 'MYARR', 'version': 2})[FW_POINTERS_REFS_KEY]) == 9
def test_dont_cleanup_recent_orphaned_snapshots(mongo_host, library, data, dry_run, fw_pointers_config): """ Check that we do / don't cleanup chunks based on the dry-run """ with FwPointersCtx(fw_pointers_config): today = dt.utcnow() - dtd(hours=12, seconds=1) _id = bson.ObjectId.from_datetime(today) library.write('symbol', data, prune_previous_version=False) with patch("bson.ObjectId", return_value=_id): library.snapshot('snap_name') # Remove the version document ; should cleanup assert library._collection.snapshots.delete_many({}) # No cleanup on dry-run if dry_run: run_as_main(main, '--library', 'user.library', '--host', mongo_host) assert mongo_count(library._collection) > 0 assert mongo_count(library._collection.versions) assert repr(library.read('symbol').data) == repr(data) # Nothing done assert len(library._collection.versions.find_one({})['parent']) else: run_as_main(main, '--library', 'user.library', '--host', mongo_host, '-f') assert mongo_count(library._collection) > 0 assert mongo_count(library._collection.versions) # Data still available (write with prune_previous_version will do the cleanup) assert repr(library.read('symbol').data) == repr(data) # Snapshot cleaned up assert len(library._collection.versions.find_one({})['parent'])
def test_cleanup_orphaned_chunks(mongo_host, library, data, dry_run, fw_pointers_config): """ Check that we do / don't cleanup chunks based on the dry-run """ with FwPointersCtx(fw_pointers_config): yesterday = dt.utcnow() - dtd(days=1, seconds=1) _id = bson.ObjectId.from_datetime(yesterday) with patch("bson.ObjectId", return_value=_id): library.write('symbol', data, prune_previous_version=False) # Number of chunks chunk_count = mongo_count(library._collection) # Remove the version document ; should cleanup library._collection.versions.delete_one({'_id': _id}) # No cleanup on dry-run if dry_run: run_as_main(main, '--library', 'user.library', '--host', mongo_host) assert mongo_count(library._collection) == chunk_count else: run_as_main(main, '--library', 'user.library', '--host', mongo_host, '-f') assert mongo_count(library._collection) == 0
def test_cleanup_orphaned_versions_integration(library): _id = ObjectId.from_datetime(dt(2013, 1, 1)) with patch('bson.ObjectId', return_value=_id): with ArcticTransaction(library, symbol, 'u1', 'l1') as mt: mt.write(symbol, ts1) assert mongo_count(library._versions, filter={'parent': {'$size': 1}}) == 1 library._cleanup_orphaned_versions(False) assert mongo_count(library._versions, filter={'parent': {'$size': 1}}) == 1
def test_bson_leak_objects_delete(library): blob = {'foo': dt(2015, 1, 1), 'object': Arctic} library.write('BLOB', blob) assert mongo_count(library._collection) == 1 assert mongo_count(library._collection.versions) == 1 library.delete('BLOB') assert mongo_count(library._collection) == 0 assert mongo_count(library._collection.versions) == 0
def check_written(self, collection, symbol, version): # Currently only called from methods which guarantee 'base_version_id' is not populated. # Make it nonetheless safe for the general case. parent_id = version_base_or_id(version) # Check all the chunks are in place seen_chunks = mongo_count(collection, filter={ 'symbol': symbol, 'parent': parent_id }) if seen_chunks != version['segment_count']: segments = [ x['segment'] for x in collection.find( { 'symbol': symbol, 'parent': parent_id }, projection={'segment': 1}, ) ] raise pymongo.errors.OperationFailure( "Failed to write all the Chunks. Saw %s expecting %s" "Parent: %s \n segments: %s" % (seen_chunks, version['segment_count'], parent_id, segments))
def test_cleanup_orphaned_chunk_doesnt_break_versions(mongo_host, library, data, fw_pointers_config): """ Check that a chunk pointed to by more than one version, aren't inadvertently cleared """ with FwPointersCtx(fw_pointers_config): yesterday = dt.utcnow() - dtd(days=1, seconds=1) _id = bson.ObjectId.from_datetime(yesterday) with patch("bson.ObjectId", return_value=_id): library.write('symbol', data, prune_previous_version=False) # Re-Write the data again # Write a whole new version rather than going down the append path... # - we want two self-standing versions, the removal of one shouldn't break the other... with patch('arctic.store._ndarray_store._APPEND_COUNT', 0): library.write('symbol', data, prune_previous_version=False) library._delete_version('symbol', 1) library._collection.versions.delete_one({'_id': _id}) assert repr(library.read('symbol').data) == repr(data) run_as_main(main, '--library', 'user.library', '--host', mongo_host, '-f') assert repr(library.read('symbol').data) == repr(data) library.delete('symbol') assert mongo_count(library._collection.versions) == 0
def test_date_range_end_not_in_range(tickstore_lib): DUMMY_DATA = [ { 'a': 1., 'b': 2., 'index': dt(2013, 1, 1, tzinfo=mktz('Europe/London')) }, { 'b': 3., 'c': 4., 'index': dt(2013, 1, 2, 10, 1, tzinfo=mktz('Europe/London')) }, ] tickstore_lib._chunk_size = 1 tickstore_lib.write('SYM', DUMMY_DATA) with patch.object(tickstore_lib._collection, 'find', side_effect=tickstore_lib._collection.find) as f: df = tickstore_lib.read('SYM', date_range=DateRange(20130101, dt(2013, 1, 2, 9, 0)), columns=None) assert_array_equal(df['b'].values, np.array([2.])) assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 1
def test_cleanup_orphaned_chunks_ignores_recent(mongo_host, library, data, dry_run): """ We don't cleanup any chunks in the range of today. That's just asking for trouble """ yesterday = dt.utcnow() - dtd(hours=12) _id = bson.ObjectId.from_datetime(yesterday) with patch("bson.ObjectId", return_value=_id): library.write('symbol', data, prune_previous_version=False) chunk_count = mongo_count(library._collection) library._collection.versions.delete_one({'_id': _id}) if dry_run: run_as_main(main, '--library', 'user.library', '--host', mongo_host) assert mongo_count(library._collection) == chunk_count else: run_as_main(main, '--library', 'user.library', '--host', mongo_host, '-f') assert mongo_count(library._collection) == chunk_count
def test_date_range(tickstore_lib): tickstore_lib.write('SYM', DUMMY_DATA) df = tickstore_lib.read('SYM', date_range=DateRange(20130101, 20130103), columns=None) assert_array_equal(df['a'].values, np.array([1, np.nan, np.nan])) assert_array_equal(df['b'].values, np.array([2., 3., 5.])) assert_array_equal(df['c'].values, np.array([np.nan, 4., 6.])) tickstore_lib.delete('SYM') # Chunk every 3 symbols and lets have some fun tickstore_lib._chunk_size = 3 tickstore_lib.write('SYM', DUMMY_DATA) with patch('pymongo.collection.Collection.find', side_effect=tickstore_lib._collection.find) as f: df = tickstore_lib.read('SYM', date_range=DateRange(20130101, 20130103), columns=None) assert_array_equal(df['b'].values, np.array([2., 3., 5.])) assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 1 df = tickstore_lib.read('SYM', date_range=DateRange(20130102, 20130103), columns=None) assert_array_equal(df['b'].values, np.array([3., 5.])) assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 1 df = tickstore_lib.read('SYM', date_range=DateRange(20130103, 20130103), columns=None) assert_array_equal(df['b'].values, np.array([5.])) assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 1 df = tickstore_lib.read('SYM', date_range=DateRange(20130102, 20130104), columns=None) assert_array_equal(df['b'].values, np.array([3., 5., 7.])) assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 2 df = tickstore_lib.read('SYM', date_range=DateRange(20130102, 20130105), columns=None) assert_array_equal(df['b'].values, np.array([3., 5., 7., 9.])) assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 2 df = tickstore_lib.read('SYM', date_range=DateRange(20130103, 20130104), columns=None) assert_array_equal(df['b'].values, np.array([5., 7.])) assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 2 df = tickstore_lib.read('SYM', date_range=DateRange(20130103, 20130105), columns=None) assert_array_equal(df['b'].values, np.array([5., 7., 9.])) assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 2 df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105), columns=None) assert_array_equal(df['b'].values, np.array([7., 9.])) assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 1 # Test the different open-closed behaviours df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, CLOSED_CLOSED), columns=None) assert_array_equal(df['b'].values, np.array([7., 9.])) df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, CLOSED_OPEN), columns=None) assert_array_equal(df['b'].values, np.array([7.])) df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, OPEN_CLOSED), columns=None) assert_array_equal(df['b'].values, np.array([9.])) df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, OPEN_OPEN), columns=None) assert_array_equal(df['b'].values, np.array([]))
def test_date_range(tickstore_lib): tickstore_lib.write('SYM', DUMMY_DATA) df = tickstore_lib.read('SYM', date_range=DateRange(20130101, 20130103), columns=None) assert_array_equal(df['a'].values, np.array([1, np.nan, np.nan])) assert_array_equal(df['b'].values, np.array([2., 3., 5.])) assert_array_equal(df['c'].values, np.array([np.nan, 4., 6.])) tickstore_lib.delete('SYM') # Chunk every 3 symbols and lets have some fun tickstore_lib._chunk_size = 3 tickstore_lib.write('SYM', DUMMY_DATA) with patch('pymongo.collection.Collection.find', side_effect=tickstore_lib._collection.find) as f: df = tickstore_lib.read('SYM', date_range=DateRange(20130101, 20130103), columns=None) assert_array_equal(df['b'].values, np.array([2., 3., 5.])) assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 1 df = tickstore_lib.read('SYM', date_range=DateRange(20130102, 20130103), columns=None) assert_array_equal(df['b'].values, np.array([3., 5.])) assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 1 df = tickstore_lib.read('SYM', date_range=DateRange(20130103, 20130103), columns=None) assert_array_equal(df['b'].values, np.array([5.])) assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 1 df = tickstore_lib.read('SYM', date_range=DateRange(20130102, 20130104), columns=None) assert_array_equal(df['b'].values, np.array([3., 5., 7.])) assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 2 df = tickstore_lib.read('SYM', date_range=DateRange(20130102, 20130105), columns=None) assert_array_equal(df['b'].values, np.array([3., 5., 7., 9.])) assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 2 df = tickstore_lib.read('SYM', date_range=DateRange(20130103, 20130104), columns=None) assert_array_equal(df['b'].values, np.array([5., 7.])) assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 2 df = tickstore_lib.read('SYM', date_range=DateRange(20130103, 20130105), columns=None) assert_array_equal(df['b'].values, np.array([5., 7., 9.])) assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 2 df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105), columns=None) assert_array_equal(df['b'].values, np.array([7., 9.])) assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 1 # Test the different open-closed behaviours df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, CLOSED_CLOSED), columns=None) assert_array_equal(df['b'].values, np.array([7., 9.])) df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, CLOSED_OPEN), columns=None) assert_array_equal(df['b'].values, np.array([7.])) df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, OPEN_CLOSED), columns=None) assert_array_equal(df['b'].values, np.array([9.])) df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, OPEN_OPEN), columns=None) assert_array_equal(df['b'].values, np.array([]))
def test_cleanup_noop(mongo_host, library, data, dry_run): """ Check that we do / don't cleanup chunks based on the dry-run """ yesterday = dt.utcnow() - dtd(days=1, seconds=1) _id = bson.ObjectId.from_datetime(yesterday) with patch("bson.ObjectId", return_value=_id): library.write('symbol', data, prune_previous_version=False) # Number of chunks chunk_count = mongo_count(library._collection) # No cleanup on dry-run if dry_run: run_as_main(main, '--library', 'user.library', '--host', mongo_host) assert mongo_count(library._collection) == chunk_count assert repr(library.read('symbol').data) == repr(data) else: run_as_main(main, '--library', 'user.library', '--host', mongo_host, '-f') assert mongo_count(library._collection) == chunk_count assert repr(library.read('symbol').data) == repr(data)
def test_save_and_resave_reuses_chunks(library): with patch('arctic.store._ndarray_store._CHUNK_SIZE', 1000): ndarr = np.random.rand(1024) library.write('MYARR', ndarr) saved_arr = library.read('MYARR').data assert np.all(ndarr == saved_arr) orig_chunks = mongo_count(library._collection) assert orig_chunks == 9 # Concatenate more values ndarr = np.concatenate([ndarr, np.random.rand(10)]) # And change the original values - we're not a simple append ndarr[0] = ndarr[1] = ndarr[2] = 0 library.write('MYARR', ndarr) saved_arr = library.read('MYARR').data assert np.all(ndarr == saved_arr) # Should contain the original chunks, but not double the number # of chunks new_chunks = mongo_count(library._collection) assert new_chunks == 11 # We hit the update (rather than upsert) code path assert mongo_count(library._collection, filter={'parent': {'$size': 2}}) == 7
def _fast_check_corruption(collection, sym, v, check_count, check_last_segment, check_append_safe): if v is None: logging.warning("Symbol {} with version {} not found, so can't be corrupted.".format(sym, v)) return False if not check_count and not check_last_segment: raise ValueError("_fast_check_corruption must be called with either of " "check_count and check_last_segment set to True") # If version marked symbol as deleted, it will force writes/appends to start from a new base: non corrupted. if isinstance(v.get('metadata'), dict) and v['metadata'].get('deleted'): return False if check_append_safe: # Check whether appending to the symbol version can potentially corrupt the data (history branch). # Inspect all segments, don't limit to v['up_to']. No newer append segments after v should exist. spec = {'symbol': sym, 'parent': v.get('base_version_id', v['_id'])} else: # Only verify segment count for current symbol version, don't check corruptability of future appends. spec = {'symbol': sym, 'parent': v.get('base_version_id', v['_id']), 'segment': {'$lt': v['up_to']}} try: # Not that commands sequence (a) is slower than (b) # (a) curs = collection.find(spec, {'segment': 1}, sort=[('segment', pymongo.DESCENDING)]) # curs.count() # curs.next() # (b) collection.find(spec, {'segment': 1}).count() # collection.find_one(spec, {'segment': 1}, sort=[('segment', pymongo.DESCENDING)]) if check_count: total_segments = mongo_count(collection, filter=spec) # Quick check: compare segment count if total_segments != v.get('segment_count', 0): return True # corrupted, don't proceed with fetching from mongo the first hit # Quick check: Segment counts agree and size is zero if total_segments == 0: return False if check_last_segment: # Quick check: compare the maximum segment's up_to number. It has to verify the version's up_to. max_seg = collection.find_one(spec, {'segment': 1}, sort=[('segment', pymongo.DESCENDING)]) max_seg = max_seg['segment'] + 1 if max_seg else 0 if max_seg != v.get('up_to'): return True # corrupted, last segment and version's up_to don't agree except OperationFailure as e: logging.warning("Corruption checks are skipped (sym={}, version={}): {}".format(sym, v['version'], str(e))) return False
def _fast_check_corruption(collection, sym, v, check_count, check_last_segment, check_append_safe): if v is None: logging.warning("Symbol {} with version {} not found, so can't be corrupted.".format(sym, v)) return False if not check_count and not check_last_segment: raise ValueError("_fast_check_corruption must be called with either of " "check_count and check_last_segment set to True") # If version marked symbol as deleted, it will force writes/appends to start from a new base: non corrupted. if isinstance(v.get('metadata'), dict) and v['metadata'].get('deleted'): return False if check_append_safe: # Check whether appending to the symbol version can potentially corrupt the data (history branch). # Inspect all segments, don't limit to v['up_to']. No newer append segments after v should exist. spec = {'symbol': sym, 'parent': v.get('base_version_id', v['_id'])} else: # Only verify segment count for current symbol version, don't check corruptability of future appends. spec = {'symbol': sym, 'parent': v.get('base_version_id', v['_id']), 'segment': {'$lt': v['up_to']}} try: # Not that commands sequence (a) is slower than (b) # (a) curs = collection.find(spec, {'segment': 1}, sort=[('segment', pymongo.DESCENDING)]) # curs.count() # curs.next() # (b) collection.find(spec, {'segment': 1}).count() # collection.find_one(spec, {'segment': 1}, sort=[('segment', pymongo.DESCENDING)]) if check_count: total_segments = mongo_count(collection, filter=spec) # Quick check: compare segment count if total_segments != v.get('segment_count', 0): return True # corrupted, don't proceed with fetching from mongo the first hit # Quick check: Segment counts agree and size is zero if total_segments == 0: return False if check_last_segment: # Quick check: compare the maximum segment's up_to number. It has to verify the version's up_to. max_seg = collection.find_one(spec, {'segment': 1}, sort=[('segment', pymongo.DESCENDING)]) max_seg = max_seg['segment'] + 1 if max_seg else 0 if max_seg != v.get('up_to'): return True # corrupted, last segment and version's up_to don't agree except OperationFailure as e: logging.warning("Corruption checks are skipped (sym={}, version={}): {}".format(sym, v['version'], str(e))) return False
def test_mongo_count_new_pymongo(monkeypatch): monkeypatch.setattr(arctic._util, '_use_new_count_api', None) with patch('pymongo.version', '3.11.0'): coll2 = MagicMock() mongo_count(coll2, filter="_id:1") mongo_count(coll2, filter={}) mongo_count(coll2) assert coll2.estimated_document_count.call_count == 2 assert coll2.count_documents.call_count == 1 assert coll2.count.call_count == 0
def test_date_range_end_not_in_range(tickstore_lib): DUMMY_DATA = [ {'a': 1., 'b': 2., 'index': dt(2013, 1, 1, tzinfo=mktz('Europe/London')) }, {'b': 3., 'c': 4., 'index': dt(2013, 1, 2, 10, 1, tzinfo=mktz('Europe/London')) }, ] tickstore_lib._chunk_size = 1 tickstore_lib.write('SYM', DUMMY_DATA) with patch.object(tickstore_lib._collection, 'find', side_effect=tickstore_lib._collection.find) as f: df = tickstore_lib.read('SYM', date_range=DateRange(20130101, dt(2013, 1, 2, 9, 0)), columns=None) assert_array_equal(df['b'].values, np.array([2.])) assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 1
def test_prune_previous_doesnt_kill_other_objects(library): blob = {'foo': dt(2015, 1, 1), 'object': Arctic} yesterday = dt.utcnow() - timedelta(days=1, seconds=1) _id = bson.ObjectId.from_datetime(yesterday) with patch("bson.ObjectId", return_value=_id): library.write('BLOB', blob, prune_previous_version=False) assert mongo_count(library._collection) == 1 assert mongo_count(library._collection.versions) == 1 _id = bson.ObjectId.from_datetime(dt.utcnow() - timedelta(hours=10)) with patch("bson.ObjectId", return_value=_id): library.write('BLOB', blob, prune_previous_version=False) assert mongo_count(library._collection) == 1 assert mongo_count(library._collection.versions) == 2 # This write should pruned the oldest version in the chunk collection library.write('BLOB', {}) assert mongo_count(library._collection) == 1 assert mongo_count(library._collection.versions) == 2 library._delete_version('BLOB', 2) assert mongo_count(library._collection) == 0 assert mongo_count(library._collection.versions) == 1
def analyze_symbol(instance, sym, from_ver, to_ver, do_reads=False): """ This is a utility function to produce text output with details about the versions of a given symbol. It is useful for debugging corruption issues and to mark corrupted versions. Parameters ---------- instance : `arctic.store.version_store.VersionStore` The VersionStore instance against which the analysis will be run. sym : `str` The symbol to analyze from_ver : `int` or `None` The lower bound for the version number we wish to analyze. If None then start from the earliest version. to_ver : `int` or `None` The upper bound for the version number we wish to analyze. If None then stop at the latest version. do_reads : `bool` If this flag is set to true, then the corruption check will actually try to read the symbol (slower). """ logging.info('Analyzing symbol {}. Versions range is [v{}, v{}]'.format( sym, from_ver, to_ver)) prev_rows = 0 prev_n = 0 prev_v = None logging.info('\nVersions for {}:'.format(sym)) for v in instance._versions.find( { 'symbol': sym, 'version': { '$gte': from_ver, '$lte': to_ver } }, sort=[('version', pymongo.ASCENDING)]): n = v.get('version') is_deleted = v.get('metadata').get( 'deleted', False) if v.get('metadata') else False if is_deleted: matching = 0 else: spec = { 'symbol': sym, 'parent': v.get('base_version_id', v['_id']), 'segment': { '$lt': v.get('up_to', 0) } } matching = mongo_count(instance._collection, filter=spec) if not is_deleted else 0 base_id = v.get('base_version_id') snaps = [ '/'.join((str(x), str(x.generation_time))) for x in v.get('parent') ] if v.get('parent') else None added_rows = v.get('up_to', 0) - prev_rows meta_match_with_prev = v.get('metadata') == prev_v.get( 'metadata') if prev_v else False delta_snap_creation = ( min([x.generation_time for x in v.get('parent')]) - v['_id'].generation_time ).total_seconds() / 60.0 if v.get('parent') else 0.0 prev_v_diff = 0 if not prev_v else v['version'] - prev_v['version'] corrupted = not is_deleted and (is_corrupted(instance, sym, v) if do_reads else fast_is_corrupted( instance, sym, v)) logging.info( "v{: <6} " "{: <6} " "{: <5} " "({: <20}): " "expected={: <6} " "found={: <6} " "last_row={: <10} " "new_rows={: <10} " "append count={: <10} " "append_size={: <10} " "type={: <14} {: <14} " "base={: <24}/{: <28} " "snap={: <30}[{:.1f} mins delayed] " "{: <20} " "{: <20}".format( n, prev_v_diff, 'DEL' if is_deleted else 'ALIVE', str(v['_id'].generation_time), v.get('segment_count', 0), matching, v.get('up_to', 0), added_rows, v.get('append_count'), v.get('append_size'), v.get('type'), 'meta-same' if meta_match_with_prev else 'meta-changed', str(base_id), str(base_id.generation_time) if base_id else '', str(snaps), delta_snap_creation, 'PREV_MISSING' if prev_n < n - 1 else '', 'CORRUPTED VERSION' if corrupted else '')) prev_rows = v.get('up_to', 0) prev_n = n prev_v = v logging.info('\nSegments for {}:'.format(sym)) for seg in instance._collection.find({'symbol': sym}, sort=[('_id', pymongo.ASCENDING)]): logging.info("{: <32} {: <7} {: <10} {: <30}".format( hashlib.sha1(seg['sha']).hexdigest(), seg.get('segment'), 'compressed' if seg.get('compressed', False) else 'raw', str([str(p) for p in seg.get('parent', [])])))
def analyze_symbol(l, sym, from_ver, to_ver, do_reads=False): """ This is a utility function to produce text output with details about the versions of a given symbol. It is useful for debugging corruption issues and to mark corrupted versions. Parameters ---------- l : `arctic.store.version_store.VersionStore` The VersionStore instance against which the analysis will be run. sym : `str` The symbol to analyze from_ver : `int` or `None` The lower bound for the version number we wish to analyze. If None then start from the earliest version. to_ver : `int` or `None` The upper bound for the version number we wish to analyze. If None then stop at the latest version. do_reads : `bool` If this flag is set to true, then the corruption check will actually try to read the symbol (slower). """ logging.info('Analyzing symbol {}. Versions range is [v{}, v{}]'.format(sym, from_ver, to_ver)) prev_rows = 0 prev_n = 0 prev_v = None logging.info('\nVersions for {}:'.format(sym)) for v in l._versions.find({'symbol': sym, 'version': {'$gte': from_ver, '$lte': to_ver}}, sort=[('version', pymongo.ASCENDING)]): n = v.get('version') is_deleted = v.get('metadata').get('deleted', False) if v.get('metadata') else False if is_deleted: matching = 0 else: spec = {'symbol': sym, 'parent': v.get('base_version_id', v['_id']), 'segment': {'$lt': v.get('up_to', 0)}} matching = mongo_count(l._collection, filter=spec) if not is_deleted else 0 base_id = v.get('base_version_id') snaps = ['/'.join((str(x), str(x.generation_time))) for x in v.get('parent')] if v.get('parent') else None added_rows = v.get('up_to', 0) - prev_rows meta_match_with_prev = v.get('metadata') == prev_v.get('metadata') if prev_v else False delta_snap_creation = (min([x.generation_time for x in v.get('parent')]) - v['_id'].generation_time).total_seconds() / 60.0 if v.get('parent') else 0.0 prev_v_diff = 0 if not prev_v else v['version'] - prev_v['version'] corrupted = not is_deleted and (is_corrupted(l, sym, v) if do_reads else fast_is_corrupted(l, sym, v)) logging.info( "v{: <6} " "{: <6} " "{: <5} " "({: <20}): " "expected={: <6} " "found={: <6} " "last_row={: <10} " "new_rows={: <10} " "append count={: <10} " "append_size={: <10} " "type={: <14} {: <14} " "base={: <24}/{: <28} " "snap={: <30}[{:.1f} mins delayed] " "{: <20} " "{: <20}".format( n, prev_v_diff, 'DEL' if is_deleted else 'ALIVE', str(v['_id'].generation_time), v.get('segment_count', 0), matching, v.get('up_to', 0), added_rows, v.get('append_count'), v.get('append_size'), v.get('type'), 'meta-same' if meta_match_with_prev else 'meta-changed', str(base_id), str(base_id.generation_time) if base_id else '', str(snaps), delta_snap_creation, 'PREV_MISSING' if prev_n < n - 1 else '', 'CORRUPTED VERSION' if corrupted else '') ) prev_rows = v.get('up_to', 0) prev_n = n prev_v = v logging.info('\nSegments for {}:'.format(sym)) for seg in l._collection.find({'symbol': sym}, sort=[('_id', pymongo.ASCENDING)]): logging.info("{: <32} {: <7} {: <10} {: <30}".format( hashlib.sha1(seg['sha']).hexdigest(), seg.get('segment'), 'compressed' if seg.get('compressed', False) else 'raw', str([str(p) for p in seg.get('parent', [])]) ))