Beispiel #1
0
def test_cleanup_orphaned_snapshots_nop(mongo_host, library, data, dry_run):
    """
    Check that we do / don't cleanup chunks based on the dry-run
    """
    yesterday = dt.utcnow() - dtd(days=1, seconds=1)
    _id = bson.ObjectId.from_datetime(yesterday)
    library.write('symbol', data, prune_previous_version=False)
    with patch("bson.ObjectId", return_value=_id):
        library.snapshot('snap_name')

    # No cleanup on dry-run
    if dry_run:
        run_as_main(main, '--library', 'user.library', '--host', mongo_host)
        assert mongo_count(library._collection) > 0
        assert mongo_count(library._collection.versions)
        assert repr(library.read('symbol').data) == repr(data)
        # Nothing done
        assert len(library._collection.versions.find_one({})['parent'])
    else:
        run_as_main(main, '--library', 'user.library', '--host', mongo_host, '-f')
        assert mongo_count(library._collection) > 0
        assert mongo_count(library._collection.versions)
        # Data still available (write with prune_previous_version will do the cleanup)
        assert repr(library.read('symbol').data) == repr(data)
        # Nothing done
        assert len(library._collection.versions.find_one({})['parent'])
Beispiel #2
0
def test_save_and_resave_reuses_chunks(library, fw_pointers_cfg):
    with FwPointersCtx(fw_pointers_cfg):
        with patch('arctic.store._ndarray_store._CHUNK_SIZE', 1000):
            ndarr = np.random.rand(1024)
            library.write('MYARR', ndarr)
            saved_arr = library.read('MYARR').data
            assert np.all(ndarr == saved_arr)
            orig_chunks = mongo_count(library._collection)
            assert orig_chunks == 9

            # Concatenate more values
            ndarr = np.concatenate([ndarr, np.random.rand(10)])
            # And change the original values - we're not a simple append
            ndarr[0] = ndarr[1] = ndarr[2] = 0
            library.write('MYARR', ndarr)
            saved_arr = library.read('MYARR').data
            assert np.all(ndarr == saved_arr)

            # Should contain the original chunks, but not double the number
            # of chunks
            new_chunks = mongo_count(library._collection)
            assert new_chunks == 11

            if fw_pointers_cfg in (FwPointersCfg.DISABLED, FwPointersCfg.HYBRID):
                # We hit the update (rather than upsert) code path
                assert mongo_count(library._collection, filter={'parent': {'$size': 2}}) == 7

            if fw_pointers_cfg in (FwPointersCfg.HYBRID, FwPointersCfg.ENABLED):
                assert len(library._versions.find_one({'symbol': 'MYARR', 'version': 2})[FW_POINTERS_REFS_KEY]) == 9
Beispiel #3
0
def test_save_and_resave_reuses_chunks(library, fw_pointers_cfg):
    with FwPointersCtx(fw_pointers_cfg):
        with patch('arctic.store._ndarray_store._CHUNK_SIZE', 1000):
            ndarr = np.random.rand(1024)
            library.write('MYARR', ndarr)
            saved_arr = library.read('MYARR').data
            assert np.all(ndarr == saved_arr)
            orig_chunks = mongo_count(library._collection)
            assert orig_chunks == 9

            # Concatenate more values
            ndarr = np.concatenate([ndarr, np.random.rand(10)])
            # And change the original values - we're not a simple append
            ndarr[0] = ndarr[1] = ndarr[2] = 0
            library.write('MYARR', ndarr)
            saved_arr = library.read('MYARR').data
            assert np.all(ndarr == saved_arr)

            # Should contain the original chunks, but not double the number
            # of chunks
            new_chunks = mongo_count(library._collection)
            assert new_chunks == 11

            if fw_pointers_cfg in (FwPointersCfg.DISABLED, FwPointersCfg.HYBRID):
                # We hit the update (rather than upsert) code path
                assert mongo_count(library._collection, filter={'parent': {'$size': 2}}) == 7

            if fw_pointers_cfg in (FwPointersCfg.HYBRID, FwPointersCfg.ENABLED):
                assert len(library._versions.find_one({'symbol': 'MYARR', 'version': 2})[FW_POINTERS_REFS_KEY]) == 9
Beispiel #4
0
def test_dont_cleanup_recent_orphaned_snapshots(mongo_host, library, data,
                                                dry_run, fw_pointers_config):
    """
    Check that we do / don't cleanup chunks based on the dry-run
    """
    with FwPointersCtx(fw_pointers_config):
        today = dt.utcnow() - dtd(hours=12, seconds=1)
        _id = bson.ObjectId.from_datetime(today)
        library.write('symbol', data, prune_previous_version=False)
        with patch("bson.ObjectId", return_value=_id):
            library.snapshot('snap_name')

        # Remove the version document ; should cleanup
        assert library._collection.snapshots.delete_many({})

        # No cleanup on dry-run
        if dry_run:
            run_as_main(main, '--library', 'user.library', '--host',
                        mongo_host)
            assert mongo_count(library._collection) > 0
            assert mongo_count(library._collection.versions)
            assert repr(library.read('symbol').data) == repr(data)
            # Nothing done
            assert len(library._collection.versions.find_one({})['parent'])
        else:
            run_as_main(main, '--library', 'user.library', '--host',
                        mongo_host, '-f')
            assert mongo_count(library._collection) > 0
            assert mongo_count(library._collection.versions)
            # Data still available (write with prune_previous_version will do the cleanup)
            assert repr(library.read('symbol').data) == repr(data)
            # Snapshot cleaned up
            assert len(library._collection.versions.find_one({})['parent'])
Beispiel #5
0
def test_cleanup_orphaned_chunks(mongo_host, library, data, dry_run,
                                 fw_pointers_config):
    """
    Check that we do / don't cleanup chunks based on the dry-run
    """
    with FwPointersCtx(fw_pointers_config):
        yesterday = dt.utcnow() - dtd(days=1, seconds=1)
        _id = bson.ObjectId.from_datetime(yesterday)
        with patch("bson.ObjectId", return_value=_id):
            library.write('symbol', data, prune_previous_version=False)

        # Number of chunks
        chunk_count = mongo_count(library._collection)
        # Remove the version document ; should cleanup
        library._collection.versions.delete_one({'_id': _id})

        # No cleanup on dry-run
        if dry_run:
            run_as_main(main, '--library', 'user.library', '--host',
                        mongo_host)
            assert mongo_count(library._collection) == chunk_count
        else:
            run_as_main(main, '--library', 'user.library', '--host',
                        mongo_host, '-f')
            assert mongo_count(library._collection) == 0
def test_cleanup_orphaned_versions_integration(library):
    _id = ObjectId.from_datetime(dt(2013, 1, 1))
    with patch('bson.ObjectId', return_value=_id):
        with ArcticTransaction(library, symbol, 'u1', 'l1') as mt:
            mt.write(symbol, ts1)
    assert mongo_count(library._versions, filter={'parent': {'$size': 1}}) == 1
    library._cleanup_orphaned_versions(False)
    assert mongo_count(library._versions, filter={'parent': {'$size': 1}}) == 1
Beispiel #7
0
def test_bson_leak_objects_delete(library):
    blob = {'foo': dt(2015, 1, 1), 'object': Arctic}
    library.write('BLOB', blob)
    assert mongo_count(library._collection) == 1
    assert mongo_count(library._collection.versions) == 1
    library.delete('BLOB')
    assert mongo_count(library._collection) == 0
    assert mongo_count(library._collection.versions) == 0
Beispiel #8
0
    def check_written(self, collection, symbol, version):
        # Currently only called from methods which guarantee 'base_version_id' is not populated.
        # Make it nonetheless safe for the general case.
        parent_id = version_base_or_id(version)

        # Check all the chunks are in place
        seen_chunks = mongo_count(collection,
                                  filter={
                                      'symbol': symbol,
                                      'parent': parent_id
                                  })

        if seen_chunks != version['segment_count']:
            segments = [
                x['segment'] for x in collection.find(
                    {
                        'symbol': symbol,
                        'parent': parent_id
                    },
                    projection={'segment': 1},
                )
            ]
            raise pymongo.errors.OperationFailure(
                "Failed to write all the Chunks. Saw %s expecting %s"
                "Parent: %s \n segments: %s" %
                (seen_chunks, version['segment_count'], parent_id, segments))
Beispiel #9
0
def test_cleanup_orphaned_chunk_doesnt_break_versions(mongo_host, library,
                                                      data,
                                                      fw_pointers_config):
    """
    Check that a chunk pointed to by more than one version, aren't inadvertently cleared
    """
    with FwPointersCtx(fw_pointers_config):
        yesterday = dt.utcnow() - dtd(days=1, seconds=1)
        _id = bson.ObjectId.from_datetime(yesterday)
        with patch("bson.ObjectId", return_value=_id):
            library.write('symbol', data, prune_previous_version=False)

        # Re-Write the data again
        # Write a whole new version rather than going down the append path...
        #     - we want two self-standing versions, the removal of one shouldn't break the other...
        with patch('arctic.store._ndarray_store._APPEND_COUNT', 0):
            library.write('symbol', data, prune_previous_version=False)
        library._delete_version('symbol', 1)
        library._collection.versions.delete_one({'_id': _id})
        assert repr(library.read('symbol').data) == repr(data)

        run_as_main(main, '--library', 'user.library', '--host', mongo_host,
                    '-f')
        assert repr(library.read('symbol').data) == repr(data)
        library.delete('symbol')
        assert mongo_count(library._collection.versions) == 0
Beispiel #10
0
def test_date_range_end_not_in_range(tickstore_lib):
    DUMMY_DATA = [
        {
            'a': 1.,
            'b': 2.,
            'index': dt(2013, 1, 1, tzinfo=mktz('Europe/London'))
        },
        {
            'b': 3.,
            'c': 4.,
            'index': dt(2013, 1, 2, 10, 1, tzinfo=mktz('Europe/London'))
        },
    ]

    tickstore_lib._chunk_size = 1
    tickstore_lib.write('SYM', DUMMY_DATA)
    with patch.object(tickstore_lib._collection,
                      'find',
                      side_effect=tickstore_lib._collection.find) as f:
        df = tickstore_lib.read('SYM',
                                date_range=DateRange(20130101,
                                                     dt(2013, 1, 2, 9, 0)),
                                columns=None)
        assert_array_equal(df['b'].values, np.array([2.]))
        assert mongo_count(tickstore_lib._collection,
                           filter=f.call_args_list[-1][0][0]) == 1
Beispiel #11
0
def test_cleanup_orphaned_chunks_ignores_recent(mongo_host, library, data, dry_run):
    """
    We don't cleanup any chunks in the range of today.  That's just asking for trouble
    """
    yesterday = dt.utcnow() - dtd(hours=12)
    _id = bson.ObjectId.from_datetime(yesterday)
    with patch("bson.ObjectId", return_value=_id):
        library.write('symbol', data, prune_previous_version=False)
    chunk_count = mongo_count(library._collection)
    library._collection.versions.delete_one({'_id': _id})

    if dry_run:
        run_as_main(main, '--library', 'user.library', '--host', mongo_host)
        assert mongo_count(library._collection) == chunk_count
    else:
        run_as_main(main, '--library', 'user.library', '--host', mongo_host, '-f')
        assert mongo_count(library._collection) == chunk_count
def test_date_range(tickstore_lib):
    tickstore_lib.write('SYM', DUMMY_DATA)
    df = tickstore_lib.read('SYM', date_range=DateRange(20130101, 20130103), columns=None)
    assert_array_equal(df['a'].values, np.array([1, np.nan, np.nan]))
    assert_array_equal(df['b'].values, np.array([2., 3., 5.]))
    assert_array_equal(df['c'].values, np.array([np.nan, 4., 6.]))

    tickstore_lib.delete('SYM')

    # Chunk every 3 symbols and lets have some fun
    tickstore_lib._chunk_size = 3
    tickstore_lib.write('SYM', DUMMY_DATA)

    with patch('pymongo.collection.Collection.find', side_effect=tickstore_lib._collection.find) as f:
        df = tickstore_lib.read('SYM', date_range=DateRange(20130101, 20130103), columns=None)
        assert_array_equal(df['b'].values, np.array([2., 3., 5.]))
        assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 1
        df = tickstore_lib.read('SYM', date_range=DateRange(20130102, 20130103), columns=None)
        assert_array_equal(df['b'].values, np.array([3., 5.]))
        assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 1
        df = tickstore_lib.read('SYM', date_range=DateRange(20130103, 20130103), columns=None)
        assert_array_equal(df['b'].values, np.array([5.]))
        assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 1

        df = tickstore_lib.read('SYM', date_range=DateRange(20130102, 20130104), columns=None)
        assert_array_equal(df['b'].values, np.array([3., 5., 7.]))
        assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 2
        df = tickstore_lib.read('SYM', date_range=DateRange(20130102, 20130105), columns=None)
        assert_array_equal(df['b'].values, np.array([3., 5., 7., 9.]))
        assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 2

        df = tickstore_lib.read('SYM', date_range=DateRange(20130103, 20130104), columns=None)
        assert_array_equal(df['b'].values, np.array([5., 7.]))
        assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 2
        df = tickstore_lib.read('SYM', date_range=DateRange(20130103, 20130105), columns=None)
        assert_array_equal(df['b'].values, np.array([5., 7., 9.]))
        assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 2

        df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105), columns=None)
        assert_array_equal(df['b'].values, np.array([7., 9.]))
        assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 1

        # Test the different open-closed behaviours
        df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, CLOSED_CLOSED), columns=None)
        assert_array_equal(df['b'].values, np.array([7., 9.]))
        df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, CLOSED_OPEN), columns=None)
        assert_array_equal(df['b'].values, np.array([7.]))
        df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, OPEN_CLOSED), columns=None)
        assert_array_equal(df['b'].values, np.array([9.]))
        df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, OPEN_OPEN), columns=None)
        assert_array_equal(df['b'].values, np.array([]))
Beispiel #13
0
def test_date_range(tickstore_lib):
    tickstore_lib.write('SYM', DUMMY_DATA)
    df = tickstore_lib.read('SYM', date_range=DateRange(20130101, 20130103), columns=None)
    assert_array_equal(df['a'].values, np.array([1, np.nan, np.nan]))
    assert_array_equal(df['b'].values, np.array([2., 3., 5.]))
    assert_array_equal(df['c'].values, np.array([np.nan, 4., 6.]))

    tickstore_lib.delete('SYM')

    # Chunk every 3 symbols and lets have some fun
    tickstore_lib._chunk_size = 3
    tickstore_lib.write('SYM', DUMMY_DATA)

    with patch('pymongo.collection.Collection.find', side_effect=tickstore_lib._collection.find) as f:
        df = tickstore_lib.read('SYM', date_range=DateRange(20130101, 20130103), columns=None)
        assert_array_equal(df['b'].values, np.array([2., 3., 5.]))
        assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 1
        df = tickstore_lib.read('SYM', date_range=DateRange(20130102, 20130103), columns=None)
        assert_array_equal(df['b'].values, np.array([3., 5.]))
        assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 1
        df = tickstore_lib.read('SYM', date_range=DateRange(20130103, 20130103), columns=None)
        assert_array_equal(df['b'].values, np.array([5.]))
        assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 1

        df = tickstore_lib.read('SYM', date_range=DateRange(20130102, 20130104), columns=None)
        assert_array_equal(df['b'].values, np.array([3., 5., 7.]))
        assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 2
        df = tickstore_lib.read('SYM', date_range=DateRange(20130102, 20130105), columns=None)
        assert_array_equal(df['b'].values, np.array([3., 5., 7., 9.]))
        assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 2

        df = tickstore_lib.read('SYM', date_range=DateRange(20130103, 20130104), columns=None)
        assert_array_equal(df['b'].values, np.array([5., 7.]))
        assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 2
        df = tickstore_lib.read('SYM', date_range=DateRange(20130103, 20130105), columns=None)
        assert_array_equal(df['b'].values, np.array([5., 7., 9.]))
        assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 2

        df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105), columns=None)
        assert_array_equal(df['b'].values, np.array([7., 9.]))
        assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 1

        # Test the different open-closed behaviours
        df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, CLOSED_CLOSED), columns=None)
        assert_array_equal(df['b'].values, np.array([7., 9.]))
        df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, CLOSED_OPEN), columns=None)
        assert_array_equal(df['b'].values, np.array([7.]))
        df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, OPEN_CLOSED), columns=None)
        assert_array_equal(df['b'].values, np.array([9.]))
        df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, OPEN_OPEN), columns=None)
        assert_array_equal(df['b'].values, np.array([]))
Beispiel #14
0
def test_cleanup_noop(mongo_host, library, data, dry_run):
    """
    Check that we do / don't cleanup chunks based on the dry-run
    """
    yesterday = dt.utcnow() - dtd(days=1, seconds=1)
    _id = bson.ObjectId.from_datetime(yesterday)
    with patch("bson.ObjectId", return_value=_id):
        library.write('symbol', data, prune_previous_version=False)

    # Number of chunks
    chunk_count = mongo_count(library._collection)

    # No cleanup on dry-run
    if dry_run:
        run_as_main(main, '--library', 'user.library', '--host', mongo_host)
        assert mongo_count(library._collection) == chunk_count
        assert repr(library.read('symbol').data) == repr(data)
    else:
        run_as_main(main, '--library', 'user.library', '--host', mongo_host, '-f')
        assert mongo_count(library._collection) == chunk_count
        assert repr(library.read('symbol').data) == repr(data)
Beispiel #15
0
def test_save_and_resave_reuses_chunks(library):
    with patch('arctic.store._ndarray_store._CHUNK_SIZE', 1000):
        ndarr = np.random.rand(1024)
        library.write('MYARR', ndarr)
        saved_arr = library.read('MYARR').data
        assert np.all(ndarr == saved_arr)
        orig_chunks = mongo_count(library._collection)
        assert orig_chunks == 9

        # Concatenate more values
        ndarr = np.concatenate([ndarr, np.random.rand(10)])
        # And change the original values - we're not a simple append
        ndarr[0] = ndarr[1] = ndarr[2] = 0
        library.write('MYARR', ndarr)
        saved_arr = library.read('MYARR').data
        assert np.all(ndarr == saved_arr)

        # Should contain the original chunks, but not double the number
        # of chunks
        new_chunks = mongo_count(library._collection)
        assert new_chunks == 11

        # We hit the update (rather than upsert) code path
        assert mongo_count(library._collection, filter={'parent': {'$size': 2}}) == 7
Beispiel #16
0
def _fast_check_corruption(collection, sym, v, check_count, check_last_segment, check_append_safe):
    if v is None:
        logging.warning("Symbol {} with version {} not found, so can't be corrupted.".format(sym, v))
        return False
    
    if not check_count and not check_last_segment:
        raise ValueError("_fast_check_corruption must be called with either of "
                         "check_count and check_last_segment set to True")

    # If version marked symbol as deleted, it will force writes/appends to start from a new base: non corrupted.
    if isinstance(v.get('metadata'), dict) and v['metadata'].get('deleted'):
        return False
     
    if check_append_safe:
        # Check whether appending to the symbol version can potentially corrupt the data (history branch).
        # Inspect all segments, don't limit to v['up_to']. No newer append segments after v should exist.
        spec = {'symbol': sym, 'parent': v.get('base_version_id', v['_id'])}
    else:
        # Only verify segment count for current symbol version, don't check corruptability of future appends.
        spec = {'symbol': sym, 'parent': v.get('base_version_id', v['_id']), 'segment': {'$lt': v['up_to']}}

    try:
        # Not that commands sequence (a) is slower than (b)
        # (a) curs = collection.find(spec, {'segment': 1}, sort=[('segment', pymongo.DESCENDING)])
        #     curs.count()
        #     curs.next()
        # (b) collection.find(spec, {'segment': 1}).count()
        #     collection.find_one(spec, {'segment': 1}, sort=[('segment', pymongo.DESCENDING)])

        if check_count:
            total_segments = mongo_count(collection, filter=spec)
            # Quick check: compare segment count
            if total_segments != v.get('segment_count', 0):
                return True  # corrupted, don't proceed with fetching from mongo the first hit
            # Quick check: Segment counts agree and size is zero
            if total_segments == 0:
                return False
        
        if check_last_segment:
            # Quick check: compare the maximum segment's up_to number. It has to verify the version's up_to.
            max_seg = collection.find_one(spec, {'segment': 1}, sort=[('segment', pymongo.DESCENDING)])
            max_seg = max_seg['segment'] + 1 if max_seg else 0
            if max_seg != v.get('up_to'):
                return True  # corrupted, last segment and version's up_to don't agree
    except OperationFailure as e:
        logging.warning("Corruption checks are skipped (sym={}, version={}): {}".format(sym, v['version'], str(e)))

    return False
Beispiel #17
0
def _fast_check_corruption(collection, sym, v, check_count, check_last_segment, check_append_safe):
    if v is None:
        logging.warning("Symbol {} with version {} not found, so can't be corrupted.".format(sym, v))
        return False

    if not check_count and not check_last_segment:
        raise ValueError("_fast_check_corruption must be called with either of "
                         "check_count and check_last_segment set to True")

    # If version marked symbol as deleted, it will force writes/appends to start from a new base: non corrupted.
    if isinstance(v.get('metadata'), dict) and v['metadata'].get('deleted'):
        return False

    if check_append_safe:
        # Check whether appending to the symbol version can potentially corrupt the data (history branch).
        # Inspect all segments, don't limit to v['up_to']. No newer append segments after v should exist.
        spec = {'symbol': sym, 'parent': v.get('base_version_id', v['_id'])}
    else:
        # Only verify segment count for current symbol version, don't check corruptability of future appends.
        spec = {'symbol': sym, 'parent': v.get('base_version_id', v['_id']), 'segment': {'$lt': v['up_to']}}

    try:
        # Not that commands sequence (a) is slower than (b)
        # (a) curs = collection.find(spec, {'segment': 1}, sort=[('segment', pymongo.DESCENDING)])
        #     curs.count()
        #     curs.next()
        # (b) collection.find(spec, {'segment': 1}).count()
        #     collection.find_one(spec, {'segment': 1}, sort=[('segment', pymongo.DESCENDING)])

        if check_count:
            total_segments = mongo_count(collection, filter=spec)
            # Quick check: compare segment count
            if total_segments != v.get('segment_count', 0):
                return True  # corrupted, don't proceed with fetching from mongo the first hit
            # Quick check: Segment counts agree and size is zero
            if total_segments == 0:
                return False

        if check_last_segment:
            # Quick check: compare the maximum segment's up_to number. It has to verify the version's up_to.
            max_seg = collection.find_one(spec, {'segment': 1}, sort=[('segment', pymongo.DESCENDING)])
            max_seg = max_seg['segment'] + 1 if max_seg else 0
            if max_seg != v.get('up_to'):
                return True  # corrupted, last segment and version's up_to don't agree
    except OperationFailure as e:
        logging.warning("Corruption checks are skipped (sym={}, version={}): {}".format(sym, v['version'], str(e)))

    return False
Beispiel #18
0
def test_mongo_count_new_pymongo(monkeypatch):
    monkeypatch.setattr(arctic._util, '_use_new_count_api', None)
    with patch('pymongo.version', '3.11.0'):
        coll2 = MagicMock()
        mongo_count(coll2, filter="_id:1")
        mongo_count(coll2, filter={})
        mongo_count(coll2)
        assert coll2.estimated_document_count.call_count == 2
        assert coll2.count_documents.call_count == 1
        assert coll2.count.call_count == 0
Beispiel #19
0
def test_date_range_end_not_in_range(tickstore_lib):
    DUMMY_DATA = [
                  {'a': 1.,
                   'b': 2.,
                   'index': dt(2013, 1, 1, tzinfo=mktz('Europe/London'))
                   },
                  {'b': 3.,
                   'c': 4.,
                   'index': dt(2013, 1, 2, 10, 1, tzinfo=mktz('Europe/London'))
                   },
                  ]

    tickstore_lib._chunk_size = 1
    tickstore_lib.write('SYM', DUMMY_DATA)
    with patch.object(tickstore_lib._collection, 'find', side_effect=tickstore_lib._collection.find) as f:
        df = tickstore_lib.read('SYM', date_range=DateRange(20130101, dt(2013, 1, 2, 9, 0)), columns=None)
        assert_array_equal(df['b'].values, np.array([2.]))
        assert mongo_count(tickstore_lib._collection, filter=f.call_args_list[-1][0][0]) == 1
Beispiel #20
0
def test_prune_previous_doesnt_kill_other_objects(library):
    blob = {'foo': dt(2015, 1, 1), 'object': Arctic}

    yesterday = dt.utcnow() - timedelta(days=1, seconds=1)
    _id = bson.ObjectId.from_datetime(yesterday)
    with patch("bson.ObjectId", return_value=_id):
        library.write('BLOB', blob, prune_previous_version=False)
    assert mongo_count(library._collection) == 1
    assert mongo_count(library._collection.versions) == 1

    _id = bson.ObjectId.from_datetime(dt.utcnow() - timedelta(hours=10))
    with patch("bson.ObjectId", return_value=_id):
        library.write('BLOB', blob, prune_previous_version=False)
    assert mongo_count(library._collection) == 1
    assert mongo_count(library._collection.versions) == 2

    # This write should pruned the oldest version in the chunk collection
    library.write('BLOB', {})
    assert mongo_count(library._collection) == 1
    assert mongo_count(library._collection.versions) == 2

    library._delete_version('BLOB', 2)
    assert mongo_count(library._collection) == 0
    assert mongo_count(library._collection.versions) == 1
def analyze_symbol(instance, sym, from_ver, to_ver, do_reads=False):
    """
    This is a utility function to produce text output with details about the versions of a given symbol.
    It is useful for debugging corruption issues and to mark corrupted versions.
    Parameters
    ----------
    instance : `arctic.store.version_store.VersionStore`
        The VersionStore instance against which the analysis will be run.
    sym : `str`
        The symbol to analyze
    from_ver : `int` or `None`
        The lower bound for the version number we wish to analyze. If None then start from the earliest version.
    to_ver : `int` or `None`
        The upper bound for the version number we wish to analyze. If None then stop at the latest version.
    do_reads : `bool`
        If this flag is set to true, then the corruption check will actually try to read the symbol (slower).
    """
    logging.info('Analyzing symbol {}. Versions range is [v{}, v{}]'.format(
        sym, from_ver, to_ver))
    prev_rows = 0
    prev_n = 0
    prev_v = None

    logging.info('\nVersions for {}:'.format(sym))
    for v in instance._versions.find(
        {
            'symbol': sym,
            'version': {
                '$gte': from_ver,
                '$lte': to_ver
            }
        },
            sort=[('version', pymongo.ASCENDING)]):
        n = v.get('version')

        is_deleted = v.get('metadata').get(
            'deleted', False) if v.get('metadata') else False

        if is_deleted:
            matching = 0
        else:
            spec = {
                'symbol': sym,
                'parent': v.get('base_version_id', v['_id']),
                'segment': {
                    '$lt': v.get('up_to', 0)
                }
            }
            matching = mongo_count(instance._collection,
                                   filter=spec) if not is_deleted else 0

        base_id = v.get('base_version_id')
        snaps = [
            '/'.join((str(x), str(x.generation_time))) for x in v.get('parent')
        ] if v.get('parent') else None

        added_rows = v.get('up_to', 0) - prev_rows

        meta_match_with_prev = v.get('metadata') == prev_v.get(
            'metadata') if prev_v else False

        delta_snap_creation = (
            min([x.generation_time
                 for x in v.get('parent')]) - v['_id'].generation_time
        ).total_seconds() / 60.0 if v.get('parent') else 0.0

        prev_v_diff = 0 if not prev_v else v['version'] - prev_v['version']

        corrupted = not is_deleted and (is_corrupted(instance, sym, v)
                                        if do_reads else fast_is_corrupted(
                                            instance, sym, v))

        logging.info(
            "v{: <6} "
            "{: <6} "
            "{: <5} "
            "({: <20}):   "
            "expected={: <6} "
            "found={: <6} "
            "last_row={: <10} "
            "new_rows={: <10} "
            "append count={: <10} "
            "append_size={: <10} "
            "type={: <14} {: <14} "
            "base={: <24}/{: <28} "
            "snap={: <30}[{:.1f} mins delayed] "
            "{: <20} "
            "{: <20}".format(
                n, prev_v_diff, 'DEL' if is_deleted else 'ALIVE',
                str(v['_id'].generation_time), v.get('segment_count',
                                                     0), matching,
                v.get('up_to', 0), added_rows, v.get('append_count'),
                v.get('append_size'), v.get('type'),
                'meta-same' if meta_match_with_prev else 'meta-changed',
                str(base_id),
                str(base_id.generation_time) if base_id else '', str(snaps),
                delta_snap_creation, 'PREV_MISSING' if prev_n < n - 1 else '',
                'CORRUPTED VERSION' if corrupted else ''))
        prev_rows = v.get('up_to', 0)
        prev_n = n
        prev_v = v

    logging.info('\nSegments for {}:'.format(sym))
    for seg in instance._collection.find({'symbol': sym},
                                         sort=[('_id', pymongo.ASCENDING)]):
        logging.info("{: <32}  {: <7}  {: <10} {: <30}".format(
            hashlib.sha1(seg['sha']).hexdigest(), seg.get('segment'),
            'compressed' if seg.get('compressed', False) else 'raw',
            str([str(p) for p in seg.get('parent', [])])))
Beispiel #22
0
def analyze_symbol(l, sym, from_ver, to_ver, do_reads=False):
    """
    This is a utility function to produce text output with details about the versions of a given symbol.
    It is useful for debugging corruption issues and to mark corrupted versions.
    Parameters
    ----------
    l : `arctic.store.version_store.VersionStore`
        The VersionStore instance against which the analysis will be run.
    sym : `str`
        The symbol to analyze
    from_ver : `int` or `None`
        The lower bound for the version number we wish to analyze. If None then start from the earliest version.
    to_ver : `int` or `None`
        The upper bound for the version number we wish to analyze. If None then stop at the latest version.
    do_reads : `bool`
        If this flag is set to true, then the corruption check will actually try to read the symbol (slower).
    """
    logging.info('Analyzing symbol {}. Versions range is [v{}, v{}]'.format(sym, from_ver, to_ver))
    prev_rows = 0
    prev_n = 0
    prev_v = None

    logging.info('\nVersions for {}:'.format(sym))
    for v in l._versions.find({'symbol': sym, 'version': {'$gte': from_ver, '$lte': to_ver}},
                              sort=[('version', pymongo.ASCENDING)]):
        n = v.get('version')

        is_deleted = v.get('metadata').get('deleted', False) if v.get('metadata') else False

        if is_deleted:
            matching = 0
        else:
            spec = {'symbol': sym, 'parent': v.get('base_version_id', v['_id']), 'segment': {'$lt': v.get('up_to', 0)}}
            matching = mongo_count(l._collection, filter=spec) if not is_deleted else 0

        base_id = v.get('base_version_id')
        snaps = ['/'.join((str(x), str(x.generation_time))) for x in v.get('parent')] if v.get('parent') else None

        added_rows = v.get('up_to', 0) - prev_rows

        meta_match_with_prev = v.get('metadata') == prev_v.get('metadata') if prev_v else False

        delta_snap_creation = (min([x.generation_time for x in v.get('parent')]) - v['_id'].generation_time).total_seconds() / 60.0 if v.get('parent') else 0.0

        prev_v_diff = 0 if not prev_v else v['version'] - prev_v['version']

        corrupted = not is_deleted and (is_corrupted(l, sym, v) if do_reads else fast_is_corrupted(l, sym, v))

        logging.info(
            "v{: <6} "
            "{: <6} "
            "{: <5} "
            "({: <20}):   "
            "expected={: <6} "
            "found={: <6} "
            "last_row={: <10} "
            "new_rows={: <10} "
            "append count={: <10} "
            "append_size={: <10} "
            "type={: <14} {: <14} "
            "base={: <24}/{: <28} "
            "snap={: <30}[{:.1f} mins delayed] "
            "{: <20} "
            "{: <20}".format(
                n,
                prev_v_diff,
                'DEL' if is_deleted else 'ALIVE',
                str(v['_id'].generation_time),
                v.get('segment_count', 0),
                matching,
                v.get('up_to', 0),
                added_rows,
                v.get('append_count'),
                v.get('append_size'),
                v.get('type'),
                'meta-same' if meta_match_with_prev else 'meta-changed',
                str(base_id),
                str(base_id.generation_time) if base_id else '',
                str(snaps),
                delta_snap_creation,
                'PREV_MISSING' if prev_n < n - 1 else '',
                'CORRUPTED VERSION' if corrupted else '')
        )
        prev_rows = v.get('up_to', 0)
        prev_n = n
        prev_v = v

    logging.info('\nSegments for {}:'.format(sym))
    for seg in l._collection.find({'symbol': sym}, sort=[('_id', pymongo.ASCENDING)]):
        logging.info("{: <32}  {: <7}  {: <10} {: <30}".format(
            hashlib.sha1(seg['sha']).hexdigest(),
            seg.get('segment'),
            'compressed' if seg.get('compressed', False) else 'raw',
            str([str(p) for p in seg.get('parent', [])])
        ))