def test_multishard_count_with_motion(self): api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") api.set_shard_at_rest('dummy', 2, "dest1/test_sharding") doc1 = {'x': 1, 'y': 1} doc2 = {'x': 1, 'y': 2} doc3 = {'x': 2, 'y': 1} doc4 = {'x': 2, 'y': 2} self.db1.dummy.insert(doc1) self.db1.dummy.insert(doc2) self.db1.dummy.insert(doc3) self.db1.dummy.insert(doc4) results = operations.multishard_find('dummy', {}).count() self.assertEquals(4, results) # Mimic the shard now being in the second location and there being # documents left here api.start_migration('dummy', 2, "dest2/test_sharding") api.set_shard_to_migration_status( 'dummy', 2, api.ShardStatus.POST_MIGRATION_PAUSED_AT_DESTINATION) self.db2.dummy.insert(doc3) self.db2.dummy.insert(doc4) results = operations.multishard_find('dummy', {}).count() self.assertEquals(4, results)
def run(self): try: blue('* Starting migration') api.start_migration(self.collection_name, self.shard_key, self.new_location) # Copy phase blue('* Doing copy') oplog_pos = _get_oplog_pos(self.collection_name, self.shard_key) _do_copy(self.collection_name, self.shard_key, self.insert_throttle) # Sync phase blue('* Initial oplog sync') start_sync_time = time.time() api.set_shard_to_migration_status( self.collection_name, self.shard_key, metadata.ShardStatus.MIGRATING_SYNC) oplog_pos = _sync_from_oplog(self.collection_name, self.shard_key, oplog_pos) # Ensure that the sync has taken at least as long as our caching time # to ensure that all writes will get paused at approximately the same # time. while time.time() < start_sync_time + api.get_caching_duration(): time.sleep(0.05) oplog_pos = _sync_from_oplog(self.collection_name, self.shard_key, oplog_pos) # Now all the caching of metadata should be stopped for this shard. # We can flip to being paused at destination and wait ~100ms for any # pending updates/inserts to be performed. If these are taking longer # than 100ms then you are in a bad place and should rethink sharding. blue('* Pausing at destination') api.set_shard_to_migration_status( self.collection_name, self.shard_key, metadata.ShardStatus.POST_MIGRATION_PAUSED_AT_DESTINATION) time.sleep(0.1) blue('* Syncing oplog once more') _sync_from_oplog(self.collection_name, self.shard_key, oplog_pos) # Delete phase blue('* Doing deletion') api.set_shard_to_migration_status( self.collection_name, self.shard_key, metadata.ShardStatus.POST_MIGRATION_DELETE) _delete_source_data(self.collection_name, self.shard_key, delete_throttle=self.delete_throttle) api.set_shard_at_rest(self.collection_name, self.shard_key, self.new_location, force=True) blue('* Done') except: self.exception = sys.exc_info() raise
def _attempt_migration(self, num_records): api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") api.set_shard_at_rest('dummy', 2, "dest1/test_sharding") account_1 = self._prepare_account_data(self.db1, 1, xrange(0, num_records)) account_2 = self._prepare_account_data(self.db1, 2, xrange(0, num_records)) shard_manager = sharder._begin_migration('dummy', 1, "dest2/test_sharding") self._modify_data(account_1, account_2, num_records, num_records) while not shard_manager.is_finished(): time.sleep(0.01) self._verify_end_state(account_1, account_2, self.unwrapped_dummy_1, self.unwrapped_dummy_2) # Check that the data for the other account has remained intact and in # the same place account_2_actual = list(self.unwrapped_dummy_1.find({'account_id': 2})) account_2_actual = list( sorted(account_2_actual, key=lambda r: r['some_key'])) self.assertEquals(account_2, account_2_actual) # Now migrate back to the source print 'Now migrate backwards...' shard_manager = sharder._begin_migration('dummy', 1, "dest1/test_sharding") self._modify_data(account_1, account_2, num_records * 2, num_records) while not shard_manager.is_finished(): time.sleep(0.01) self._verify_end_state(account_1, account_2, self.unwrapped_dummy_2, self.unwrapped_dummy_1)
def test_update(self): # Put the same document in multiple locations (a mid-migration status) # then do an update and ensure that only the correct place has been # updated. api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") doc1 = {'x': 1, 'y': 1} self.db1.dummy.insert(doc1) api.start_migration('dummy', 1, 'dest2/test_sharding') api.set_shard_to_migration_status('dummy', 1, api.ShardStatus.MIGRATING_COPY) self.db2.dummy.insert(doc1) result = operations.multishard_update('dummy', {}, {'$inc': {'y': 1}}) self.assertEquals(1, result['n']) # Query the correct shard first and see that the counter has been # incremented result, = operations.multishard_find('dummy', {'x': 1}) self.assertEquals(2, result['y']) # Now spoof the metadata such that the system thinks the data is on # shard2. The counter should still be 1 here. api.set_shard_at_rest('dummy', 1, "dest2/test_sharding", force=True) result, = operations.multishard_find('dummy', {'x': 1}) self.assertEquals(1, result['y'])
def _attempt_migration(self, num_records): api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") api.set_shard_at_rest('dummy', 2, "dest1/test_sharding") account_1 = self._prepare_account_data( self.db1, 1, xrange(0, num_records)) account_2 = self._prepare_account_data( self.db1, 2, xrange(0, num_records)) shard_manager = sharder._begin_migration( 'dummy', 1, "dest2/test_sharding") self._modify_data(account_1, account_2, num_records, num_records) while not shard_manager.is_finished(): time.sleep(0.01) self._verify_end_state( account_1, account_2, self.unwrapped_dummy_1, self.unwrapped_dummy_2) # Check that the data for the other account has remained intact and in # the same place account_2_actual = list(self.unwrapped_dummy_1.find({'account_id': 2})) account_2_actual = list(sorted( account_2_actual, key=lambda r: r['some_key'])) self.assertEquals(account_2, account_2_actual) # Now migrate back to the source print 'Now migrate backwards...' shard_manager = sharder._begin_migration( 'dummy', 1, "dest1/test_sharding") self._modify_data(account_1, account_2, num_records * 2, num_records) while not shard_manager.is_finished(): time.sleep(0.01) self._verify_end_state( account_1, account_2, self.unwrapped_dummy_2, self.unwrapped_dummy_1)
def test_alive(self): api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") doc1 = {'x': 1, 'y': 1} self.db1.dummy.insert(doc1) c = operations.multishard_find('dummy', {}) self.assertTrue(c.alive)
def test_aggregate(self): api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") api.set_shard_at_rest('dummy', 2, "dest2/test_sharding") for y in range(10): doc1 = {'x': 1, 'y': y} doc2 = {'x': 2, 'y': y} self.db1.dummy.insert(doc1) self.db2.dummy.insert(doc2) pipeline = [ { '$match': { 'x': 2 } }, { '$group': { '_id': 'total', 's': { '$sum': '$y' } } }, ] result = operations.multishard_aggregate('dummy', pipeline)['result'] self.assertEquals([{'_id': 'total', 's': 45}], result)
def run(self): try: # Copy phase self.manager.set_phase('copy') api.start_migration(self.collection_name, self.shard_key, self.new_location) oplog_pos = _get_oplog_pos(self.collection_name, self.shard_key) _do_copy(self.collection_name, self.shard_key, self.manager) # Sync phase self.manager.set_phase('sync') start_sync_time = time.time() api.set_shard_to_migration_status( self.collection_name, self.shard_key, metadata.ShardStatus.MIGRATING_SYNC) oplog_pos = _sync_from_oplog(self.collection_name, self.shard_key, oplog_pos) # Ensure that the sync has taken at least as long as our caching # time to ensure that all writes will get paused at approximately # the same time. while time.time() < start_sync_time + api.get_caching_duration(): time.sleep(0.05) oplog_pos = _sync_from_oplog(self.collection_name, self.shard_key, oplog_pos) # Now all the caching of metadata should be stopped for this shard. # We can flip to being paused at destination and wait ~100ms for any # pending updates/inserts to be performed. If these are taking # longer than 100ms then you are in a bad place and should rethink # sharding. api.set_shard_to_migration_status( self.collection_name, self.shard_key, metadata.ShardStatus.POST_MIGRATION_PAUSED_AT_DESTINATION) time.sleep(0.1) # Sync the oplog one final time to catch any writes that were # performed during the pause _sync_from_oplog(self.collection_name, self.shard_key, oplog_pos) # Delete phase self.manager.set_phase('delete') api.set_shard_to_migration_status( self.collection_name, self.shard_key, metadata.ShardStatus.POST_MIGRATION_DELETE) _delete_source_data(self.collection_name, self.shard_key, self.manager) api.set_shard_at_rest(self.collection_name, self.shard_key, self.new_location, force=True) self.manager.set_phase('complete') except: self.exception = sys.exc_info() raise finally: close_thread_connections(threading.current_thread())
def test_query(self): api.create_realm("dummy-realm", "some_field", "dummy_collection") api.set_shard_at_rest("dummy-realm", 1, "dest1/some_db") expected_metadata = {"shard_key": 1, "location": "dest1/some_db", "realm": "dummy-realm"} def _trim_results(docs): return [{"shard_key": doc["shard_key"], "location": doc["location"], "realm": doc["realm"]} for doc in docs] store = metadata.ShardMetadataStore({"name": "dummy-realm"}) results = _trim_results(store._query_shards_collection()) self.assertEquals([expected_metadata], results) results = _trim_results(store._query_shards_collection(1)) self.assertEquals([expected_metadata], results) results = _trim_results(store._query_shards_collection(2)) self.assertEquals([], results) store = metadata.ShardMetadataStore({"name": "some-other-realm"}) results = _trim_results(store._query_shards_collection()) self.assertEquals([], results) results = _trim_results(store._query_shards_collection(1)) self.assertEquals([], results)
def test_update(self): # Put the same document in multiple locations (a mid-migration status) # then do an update and ensure that only the correct place has been # updated. api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") doc1 = {'x': 1, 'y': 1} self.db1.dummy.insert(doc1) api.start_migration('dummy', 1, 'dest2/test_sharding') api.set_shard_to_migration_status( 'dummy', 1, api.ShardStatus.MIGRATING_COPY) self.db2.dummy.insert(doc1) result = operations.multishard_update('dummy', {}, {'$inc': {'y': 1}}) self.assertEquals(1, result['n']) # Query the correct shard first and see that the counter has been # incremented result, = operations.multishard_find('dummy', {'x': 1}) self.assertEquals(2, result['y']) # Now spoof the metadata such that the system thinks the data is on # shard2. The counter should still be 1 here. api.set_shard_at_rest('dummy', 1, "dest2/test_sharding", force=True) result, = operations.multishard_find('dummy', {'x': 1}) self.assertEquals(1, result['y'])
def test_query(self): api.create_realm('dummy-realm', 'some_field', 'dummy_collection', 'cluster-1/%s' % test_settings.CONN1['db_name']) api.set_shard_at_rest('dummy-realm', 1, 'dest1/some_db') expected_metadata = { 'shard_key': 1, 'location': 'dest1/some_db', 'realm': 'dummy-realm' } def _trim_results(docs): return [{ 'shard_key': doc['shard_key'], 'location': doc['location'], 'realm': doc['realm'] } for doc in docs] store = metadata.ShardMetadataStore({'name': 'dummy-realm'}) results = _trim_results(store._query_shards_collection()) self.assertEquals([expected_metadata], results) results = _trim_results(store._query_shards_collection(1)) self.assertEquals([expected_metadata], results) results = _trim_results(store._query_shards_collection(2)) self.assertEquals([], results) store = metadata.ShardMetadataStore({'name': 'some-other-realm'}) results = _trim_results(store._query_shards_collection()) self.assertEquals([], results) results = _trim_results(store._query_shards_collection(1)) self.assertEquals([], results)
def test_raise_if_changing_shard_location_once_set(self): default_dest = 'cluster-2/%s' % test_settings.CONN2['db_name'] api.create_realm('dummy-realm', 'some_field', 'dummy_collection', default_dest) api.set_shard_at_rest('dummy-realm', 1, 'dest2/some_db') with self.assertRaises(Exception): api.set_shard_at_rest('dummy-realm', 1, 'dest1/some_db')
def test_sync_uses_correct_connection(self): """This tests for a bug found during a rollout. The connection for the metadata was assumed to be the same connection as the source data was going to be coming from. This is *not* always the case. """ # To test this a migration from new to old will expose the bug api.set_shard_at_rest('dummy', 1, "dest2/test_sharding") api.start_migration('dummy', 1, "dest1/test_sharding") # Mimic the state the shard would be in after a document was copied # from one location to another doc1 = {'x': 1, 'y': 1} doc1['_id'] = self.db1.dummy.insert(doc1) self.db2.dummy.insert(doc1) # Get the initial oplog position, do an update and then sync from the # initial position initial_oplog_pos = sharder._get_oplog_pos('dummy', 1) self.db2.dummy.update({'x': 1}, {'$inc': {'y': 1}}) api.set_shard_to_migration_status('dummy', 1, api.ShardStatus.MIGRATING_SYNC) sharder._sync_from_oplog('dummy', 1, initial_oplog_pos) # The data on the first database should now reflect the update that # went through doc2, = self.db1.dummy.find({}) self.assertEquals(2, doc2['y'])
def test_sync_uses_correct_connection(self): """This tests for a bug found during a rollout. The connection for the metadata was assumed to be the same connection as the source data was going to be coming from. This is *not* always the case. """ # To test this a migration from new to old will expose the bug api.set_shard_at_rest('dummy', 1, "dest2/test_sharding") api.start_migration('dummy', 1, "dest1/test_sharding") # Mimic the state the shard would be in after a document was copied # from one location to another doc1 = {'x': 1, 'y': 1} doc1['_id'] = self.db1.dummy.insert(doc1) self.db2.dummy.insert(doc1) # Get the initial oplog position, do an update and then sync from the # initial position initial_oplog_pos = sharder._get_oplog_pos('dummy', 1) self.db2.dummy.update({'x': 1}, {'$inc': {'y': 1}}) api.set_shard_to_migration_status( 'dummy', 1, api.ShardStatus.MIGRATING_SYNC) sharder._sync_from_oplog('dummy', 1, initial_oplog_pos) # The data on the first database should now reflect the update that # went through doc2, = self.db1.dummy.find({}) self.assertEquals(2, doc2['y'])
def test_set_shard_at_rest_bad_location(self): ensure_realm_exists('some_realm', 'some_field', 'some_collection') with self.assertRaises(Exception) as catcher: set_shard_at_rest('some_realm', 1, 'bad-cluster/db') self.assertEquals(catcher.exception.message, 'Cluster bad-cluster has not been configured')
def test_set_shard_at_rest_bad_location(self): ensure_realm_exists('some_realm', 'some_field', 'some_collection') with self.assertRaises(Exception) as catcher: set_shard_at_rest('some_realm', 1, 'bad-cluster/db') self.assertEquals( catcher.exception.message, 'Cluster bad-cluster has not been configured')
def run(self): try: blue('* Starting migration') api.start_migration( self.collection_name, self.shard_key, self.new_location) # Copy phase blue('* Doing copy') oplog_pos = _get_oplog_pos(self.collection_name, self.shard_key) _do_copy(self.collection_name, self.shard_key, self.insert_throttle) # Sync phase blue('* Initial oplog sync') start_sync_time = time.time() api.set_shard_to_migration_status( self.collection_name, self.shard_key, metadata.ShardStatus.MIGRATING_SYNC) oplog_pos = _sync_from_oplog( self.collection_name, self.shard_key, oplog_pos) # Ensure that the sync has taken at least as long as our caching time # to ensure that all writes will get paused at approximately the same # time. while time.time() < start_sync_time + api.get_caching_duration(): time.sleep(0.05) oplog_pos = _sync_from_oplog( self.collection_name, self.shard_key, oplog_pos) # Now all the caching of metadata should be stopped for this shard. # We can flip to being paused at destination and wait ~100ms for any # pending updates/inserts to be performed. If these are taking longer # than 100ms then you are in a bad place and should rethink sharding. blue('* Pausing at destination') api.set_shard_to_migration_status( self.collection_name, self.shard_key, metadata.ShardStatus.POST_MIGRATION_PAUSED_AT_DESTINATION) time.sleep(0.1) blue('* Syncing oplog once more') _sync_from_oplog( self.collection_name, self.shard_key, oplog_pos) # Delete phase blue('* Doing deletion') api.set_shard_to_migration_status( self.collection_name, self.shard_key, metadata.ShardStatus.POST_MIGRATION_DELETE) _delete_source_data( self.collection_name, self.shard_key, delete_throttle=self.delete_throttle) api.set_shard_at_rest( self.collection_name, self.shard_key, self.new_location, force=True) blue('* Done') except: self.exception = sys.exc_info() raise
def test_where_is(self): ensure_realm_exists( 'some_realm', 'some_field', 'some_collection', 'dest1/db') set_shard_at_rest('some_realm', 1, 'dest2/db') # Specific location self.assertEquals('dest2/db', where_is('some_collection', 1)) # Default location self.assertEquals('dest1/db', where_is('some_collection', 2))
def test_cannot_move_to_same_location(self): ensure_realm_exists('some_realm', 'some_field', 'some_collection') set_shard_at_rest('some_realm', 1, 'dest1/db') with self.assertRaises(Exception) as catcher: start_migration('some_realm', 1, 'dest1/db') self.assertEquals(catcher.exception.message, 'Shard is already at dest1/db')
def test_shard_location_does_not_change_even_when_forced(self): default_dest = 'cluster-2/%s' % test_settings.CONN2['db_name'] api.create_realm('dummy-realm', 'some_field', 'dummy_collection', default_dest) api.set_shard_at_rest('dummy-realm', 1, 'dest2/some_db') api.set_shard_at_rest('dummy-realm', 1, 'dest1/some_db', force=True) entries = metadata.ShardMetadataStore('dummy-realm') \ ._query_shards_collection() self.assertEquals(entries[0]['location'], 'dest1/some_db')
def test_where_is(self): ensure_realm_exists('some_realm', 'some_field', 'some_collection', 'dest1/db') set_shard_at_rest('some_realm', 1, 'dest2/db') # Specific location self.assertEquals('dest2/db', where_is('some_collection', 1)) # Default location self.assertEquals('dest1/db', where_is('some_collection', 2))
def run(self): try: # Copy phase self.manager.set_phase('copy') api.start_migration( self.collection_name, self.shard_key, self.new_location) oplog_pos = _get_oplog_pos(self.collection_name, self.shard_key) _do_copy(self.collection_name, self.shard_key, self.manager) # Sync phase self.manager.set_phase('sync') start_sync_time = time.time() api.set_shard_to_migration_status( self.collection_name, self.shard_key, metadata.ShardStatus.MIGRATING_SYNC) oplog_pos = _sync_from_oplog( self.collection_name, self.shard_key, oplog_pos) # Ensure that the sync has taken at least as long as our caching time # to ensure that all writes will get paused at approximately the same # time. while time.time() < start_sync_time + api.get_caching_duration(): time.sleep(0.05) oplog_pos = _sync_from_oplog( self.collection_name, self.shard_key, oplog_pos) # Now all the caching of metadata should be stopped for this shard. # We can flip to being paused at destination and wait ~100ms for any # pending updates/inserts to be performed. If these are taking longer # than 100ms then you are in a bad place and should rethink sharding. api.set_shard_to_migration_status( self.collection_name, self.shard_key, metadata.ShardStatus.POST_MIGRATION_PAUSED_AT_DESTINATION) time.sleep(0.1) # Sync the oplog one final time to catch any writes that were # performed during the pause _sync_from_oplog( self.collection_name, self.shard_key, oplog_pos) # Delete phase self.manager.set_phase('delete') api.set_shard_to_migration_status( self.collection_name, self.shard_key, metadata.ShardStatus.POST_MIGRATION_DELETE) _delete_source_data( self.collection_name, self.shard_key, self.manager) api.set_shard_at_rest( self.collection_name, self.shard_key, self.new_location, force=True) self.manager.set_phase('complete') except: close_thread_connections(threading.current_thread()) self.exception = sys.exc_info() raise
def test_fetch_all_shards_from_metadata(self): api.create_realm('dummy-realm', 'some_field', 'dummy_collection', 'cluster-1/%s' % test_settings.CONN1['db_name']) api.set_shard_at_rest('dummy-realm', 1, 'dest1/some_db') entries = metadata.ShardMetadataStore('dummy-realm') \ ._query_shards_collection() self.assertEquals(entries[0]['shard_key'], 1) self.assertEquals(entries[0]['location'], 'dest1/some_db') self.assertEquals(entries[0]['realm'], 'dummy-realm')
def test_cannot_move_to_same_location(self): ensure_realm_exists( 'some_realm', 'some_field', 'some_collection', 'dest1/db') set_shard_at_rest('some_realm', 1, 'dest1/db') with self.assertRaises(Exception) as catcher: start_migration('some_realm', 1, 'dest1/db') self.assertEquals( catcher.exception.message, 'Shard is already at dest1/db')
def test_multishard_find(self): api.set_shard_at_rest('dummy', 1, 'dest1/test_sharding') api.set_shard_at_rest('dummy', 2, 'dest2/test_sharding') doc1 = {'x': 1, 'y': 1} doc2 = {'x': 2, 'y': 1} self.db1.dummy.insert(doc1) self.db2.dummy.insert(doc2) c = operations.multishard_find('dummy', {'y': 1}) results = sorted(list(c), key=lambda d: d['x']) self.assertEquals([doc1, doc2], results)
def test_where_is(self): ensure_realm_exists('some_realm', 'some_field', 'some_collection') set_shard_at_rest('some_realm', 1, 'dest2/db') # Specific location self.assertEquals('dest2/db', where_is('some_collection', 1)) # Lack of a location with self.assertRaises(Exception) as catcher: self.assertEquals('dest1/db', where_is('some_collection', 2)) self.assertEquals( catcher.exception.message, 'Shard key 2 not placed for some_realm')
def test_where_is(self): ensure_realm_exists('some_realm', 'some_field', 'some_collection') set_shard_at_rest('some_realm', 1, 'dest2/db') # Specific location self.assertEquals('dest2/db', where_is('some_collection', 1)) # Lack of a location with self.assertRaises(Exception) as catcher: self.assertEquals('dest1/db', where_is('some_collection', 2)) self.assertEquals(catcher.exception.message, 'Shard key 2 not placed for some_realm')
def test_basic_copy(self): api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") doc1 = {'x': 1, 'y': 1} doc1['_id'] = self.db1.dummy.insert(doc1) api.start_migration('dummy', 1, "dest2/test_sharding") sharder._do_copy('dummy', 1) # The data should now be on the second database doc2, = self.db2.dummy.find({}) self.assertEquals(doc1, doc2)
def test_basic_copy(self): api.set_shard_at_rest("dummy", 1, "dest1/test_sharding") doc1 = {"x": 1, "y": 1} doc1["_id"] = self.db1.dummy.insert(doc1) api.start_migration("dummy", 1, "dest2/test_sharding") sharder._do_copy("dummy", 1) # The data should now be on the second database doc2, = self.db2.dummy.find({}) self.assertEquals(doc1, doc2)
def test_multi_remove(self): # Tests remove across multiple clusters api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") api.set_shard_at_rest('dummy', 2, "dest2/test_sharding") doc1 = {'x': 1, 'y': 1} doc2 = {'x': 2, 'y': 1} self.db1.dummy.insert(doc1) self.db2.dummy.insert(doc2) operations.multishard_remove('dummy', {'y': 1}) self.assertEquals(0, self.db1.dummy.find({}).count()) self.assertEquals(0, self.db2.dummy.find({}).count())
def test_basic_copy(self): api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") doc1 = {'x': 1, 'y': 1} doc1['_id'] = self.db1.dummy.insert(doc1) api.start_migration('dummy', 1, "dest2/test_sharding") manager = Mock(insert_throttle=None) sharder._do_copy('dummy', 1, manager) # The data should now be on the second database doc2, = self.db2.dummy.find({}) self.assertEquals(doc1, doc2)
def test_insert(self): # Perform an insert with shards set to specific locations. api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") api.set_shard_at_rest('dummy', 2, "dest2/test_sharding") doc1 = {'x': 1, 'y': 1} doc2 = {'x': 2, 'y': 1} operations.multishard_insert('dummy', doc1) operations.multishard_insert('dummy', doc2) results = list(self.db1.dummy.find({'y': 1})) self.assertEquals([doc1], results) results = list(self.db2.dummy.find({'y': 1})) self.assertEquals([doc2], results)
def test_indexed_read(self): api.set_shard_at_rest('dummy', 1, 'dest1/test_sharding') api.set_shard_at_rest('dummy', 2, 'dest2/test_sharding') doc1 = {'x': 1, 'y': 1} doc2 = {'x': 2, 'y': 1} self.db1.dummy.insert(doc1) self.db2.dummy.insert(doc2) cursor = operations.multishard_find('dummy', {'y': 1}, sort=[('x', 1), ('y', 1)]) self.assertEquals(doc1, cursor[0]) cursor = operations.multishard_find('dummy', {'y': 1}, sort=[('x', -1), ('y', 1)]) self.assertEquals(doc2, cursor[0])
def test_index_inversion(self): num_records = 200 api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") account_1 = self._prepare_account_data(self.db1, 1, xrange(0, num_records)) shard_manager = sharder._begin_migration('dummy', 1, "dest2/test_sharding") self._modify_data(account_1) while not shard_manager.is_finished(): time.sleep(0.01) self._verify_end_state(account_1, self.unwrapped_dummy_1, self.unwrapped_dummy_2)
def test_indexed_read(self): api.set_shard_at_rest('dummy', 1, 'dest1/test_sharding') api.set_shard_at_rest('dummy', 2, 'dest2/test_sharding') doc1 = {'x': 1, 'y': 1} doc2 = {'x': 2, 'y': 1} self.db1.dummy.insert(doc1) self.db2.dummy.insert(doc2) cursor = operations.multishard_find( 'dummy', {'y': 1}, sort=[('x', 1), ('y', 1)]) self.assertEquals(doc1, cursor[0]) cursor = operations.multishard_find( 'dummy', {'y': 1}, sort=[('x', -1), ('y', 1)]) self.assertEquals(doc2, cursor[0])
def test_multishard_find_with_limit_as_method(self): api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") api.set_shard_at_rest('dummy', 2, "dest2/test_sharding") doc1 = {'x': 1, 'y': 1} doc2 = {'x': 1, 'y': 2} doc3 = {'x': 2, 'y': 1} doc4 = {'x': 2, 'y': 2} self.db1.dummy.insert(doc1) self.db1.dummy.insert(doc2) self.db2.dummy.insert(doc3) self.db2.dummy.insert(doc4) results = operations.multishard_find('dummy', {}).limit(3) self.assertEquals(3, len(list(results)))
def test_multishard_rewind(self): api.set_shard_at_rest('dummy', 1, 'dest1/test_sharding') api.set_shard_at_rest('dummy', 2, 'dest2/test_sharding') doc1 = {'x': 1, 'y': 1} doc2 = {'x': 2, 'y': 1} self.db1.dummy.insert(doc1) self.db2.dummy.insert(doc2) cursor = operations.multishard_find('dummy', {'y': 1}, sort=[('x', 1)]) found = cursor.next() self.assertEquals((1, 1), (found['x'], found['y'])) cursor.rewind() found = cursor.next() self.assertEquals((1, 1), (found['x'], found['y']))
def test_index_inversion(self): num_records = 200 api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") account_1 = self._prepare_account_data( self.db1, 1, xrange(0, num_records)) shard_manager = sharder._begin_migration( 'dummy', 1, "dest2/test_sharding") self._modify_data(account_1) while not shard_manager.is_finished(): time.sleep(0.01) self._verify_end_state( account_1, self.unwrapped_dummy_1, self.unwrapped_dummy_2)
def test_remove(self): # Perform removes with shards set to specific locations. api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") api.set_shard_at_rest('dummy', 2, "dest2/test_sharding") doc1 = {'x': 1, 'y': 1} doc2 = {'x': 2, 'y': 1} self.db1.dummy.insert(doc1) self.db1.dummy.insert(doc2) self.db2.dummy.insert(doc1) self.db2.dummy.insert(doc2) operations.multishard_remove('dummy', {'x': 1, 'y': 1}) self.assertEquals(0, self.db1.dummy.find({'x': 1}).count()) self.assertEquals(1, self.db2.dummy.find({'x': 1}).count())
def test_multishard_find_with_sort_and_limit(self): api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") api.set_shard_at_rest('dummy', 2, "dest2/test_sharding") doc1 = {'x': 1, 'y': 1} doc2 = {'x': 1, 'y': 2} doc3 = {'x': 2, 'y': 1} doc4 = {'x': 2, 'y': 2} self.db1.dummy.insert(doc1) self.db1.dummy.insert(doc2) self.db2.dummy.insert(doc3) self.db2.dummy.insert(doc4) results = operations.multishard_find( 'dummy', {}, sort=[('x', 1), ('y', 1)], limit=3) self.assertEquals([doc1, doc2, doc3], list(results))
def test_multishard_find_with_shardkey_present(self): # Create an unlikely scenario where data is present in both locations # even though the data is "at rest". Then do a query with the shard # key present. As the shard key is present the query should be # targetted enough to ignore the dummy data api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") api.set_shard_at_rest('dummy', 2, "dest2/test_sharding") doc1 = {'x': 1, 'y': 1} doc2_bad = {'x': 2, 'y': 1, 'bad': True} doc2_good = {'x': 2, 'y': 1, 'bad': False} self.db1.dummy.insert(doc1) self.db1.dummy.insert(doc2_bad) self.db2.dummy.insert(doc2_good) results = operations.multishard_find('dummy', {'x': 2, 'y': 1}) self.assertEquals([doc2_good], list(results))
def test_aggregate(self): api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") api.set_shard_at_rest('dummy', 2, "dest2/test_sharding") for y in range(10): doc1 = {'x': 1, 'y': y} doc2 = {'x': 2, 'y': y} self.db1.dummy.insert(doc1) self.db2.dummy.insert(doc2) pipeline = [ {'$match': {'x': 2}}, {'$group': {'_id': 'total', 's': {'$sum': '$y'}}}, ] result = operations.multishard_aggregate('dummy', pipeline)['result'] self.assertEquals([{'_id': 'total', 's': 45}], result)
def test_multishard_find_with_sort_and_limit(self): api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") api.set_shard_at_rest('dummy', 2, "dest2/test_sharding") doc1 = {'x': 1, 'y': 1} doc2 = {'x': 1, 'y': 2} doc3 = {'x': 2, 'y': 1} doc4 = {'x': 2, 'y': 2} self.db1.dummy.insert(doc1) self.db1.dummy.insert(doc2) self.db2.dummy.insert(doc3) self.db2.dummy.insert(doc4) results = operations.multishard_find('dummy', {}, sort=[('x', 1), ('y', 1)], limit=3) self.assertEquals([doc1, doc2, doc3], list(results))
def test_multi_update(self): # Test that an update will hit multiple clusters at once api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") api.set_shard_at_rest('dummy', 2, "dest2/test_sharding") doc1 = {'x': 1, 'y': 1} doc2 = {'x': 2, 'y': 1} self.db1.dummy.insert(doc1) self.db2.dummy.insert(doc2) result = operations.multishard_update('dummy', {}, {'$inc': {'y': 1}}) self.assertEquals(2, result['n']) result, = operations.multishard_find('dummy', {'x': 1}) self.assertEquals(2, result['y']) result, = operations.multishard_find('dummy', {'x': 2}) self.assertEquals(2, result['y'])
def test_hint(self): # The easier way to test if a hint is being applied is to apply a bad # hint and watch things blow up api.set_shard_at_rest('dummy', 1, 'dest1/test_sharding') api.set_shard_at_rest('dummy', 2, 'dest2/test_sharding') doc1 = {'x': 1, 'y': 1} doc2 = {'x': 2, 'y': 1} self.db1.dummy.insert(doc1) self.db2.dummy.insert(doc2) # This should explode due to a lack of index try: c = operations.multishard_find('dummy', {'x': 1}) c = c.hint([('apples', 1)]) list(c) except OperationFailure as e: self.assertTrue("bad hint" in str(e))
def test_delete_after_migration(self): api.set_shard_at_rest("dummy", 1, "dest1/test_sharding") api.start_migration("dummy", 1, "dest2/test_sharding") # Mimic the state the shard would be in after a document was copied # from one location to another doc1 = {"x": 1, "y": 1} doc1["_id"] = self.db1.dummy.insert(doc1) self.db2.dummy.insert(doc1) api.set_shard_to_migration_status("dummy", 1, api.ShardStatus.POST_MIGRATION_DELETE) sharder._delete_source_data("dummy", 1) # The data on the first database should now be gone and the data # on the second database should be ok. self.assertEquals(0, self.db1.dummy.find({}).count()) doc1_actual, = self.db2.dummy.find({}) self.assertEquals(doc1, doc1_actual)
def test_get_location_ordering(self): # Exposes a bug that was found in caching and default locations api.create_realm('dummy-realm', 'some_field', 'dummy_collection', 'cluster-1/some_db') api.set_shard_at_rest('dummy-realm', 1, 'dest2/some_db') realm = metadata._get_realm_for_collection('dummy_collection') meta = metadata._get_metadata_for_shard(realm, 2) expected_meta = { 'status': metadata.ShardStatus.AT_REST, 'realm': 'dummy-realm', 'location': 'cluster-1/some_db' } self.assertEquals(meta, expected_meta) all_locations = metadata._get_all_locations_for_realm(realm) self.assertEquals([], all_locations['cluster-1/some_db'].contains) self.assertEquals([], all_locations['cluster-1/some_db'].excludes) self.assertEquals([1], all_locations['dest2/some_db'].contains) self.assertEquals([], all_locations['dest2/some_db'].excludes)
def test_get_location_ordering(self): # Exposes a bug that was found in caching and default locations api.create_realm( 'dummy-realm', 'some_field', 'dummy_collection', 'cluster-1/some_db') api.set_shard_at_rest('dummy-realm', 1, 'dest2/some_db') realm = metadata._get_realm_for_collection('dummy_collection') meta = metadata._get_metadata_for_shard(realm, 2) expected_meta = { 'status': metadata.ShardStatus.AT_REST, 'realm': 'dummy-realm', 'location': 'cluster-1/some_db' } self.assertEquals(meta, expected_meta) all_locations = metadata._get_all_locations_for_realm(realm) self.assertEquals([], all_locations['cluster-1/some_db'].contains) self.assertEquals([], all_locations['cluster-1/some_db'].excludes) self.assertEquals([1], all_locations['dest2/some_db'].contains) self.assertEquals([], all_locations['dest2/some_db'].excludes)
def test_multishard_find_during_post_migration(self): # Indiciate a migration has started on shard #2 and insert a document # with the same ID into both databases with slightly different data in # each location. Then ensure we only get the result from shard #1 api.set_shard_at_rest('dummy', 1, "dest1/test_sharding") api.set_shard_at_rest('dummy', 2, "dest1/test_sharding") api.start_migration('dummy', 2, "dest2/test_sharding") api.set_shard_to_migration_status( 'dummy', 2, api.ShardStatus.POST_MIGRATION_PAUSED_AT_DESTINATION) doc1 = {'x': 1, 'y': 1} doc_id = bson.ObjectId() doc2_fresh = {'_id': doc_id, 'x': 2, 'y': 1, 'is_fresh': True} doc2_stale = {'_id': doc_id, 'x': 2, 'y': 1, 'is_fresh': False} self.db1.dummy.insert(doc1) self.db1.dummy.insert(doc2_fresh) self.db2.dummy.insert(doc2_stale) c = operations.multishard_find('dummy', {'y': 1}) results = sorted(list(c), key=lambda d: d['x']) self.assertEquals([doc1, doc2_stale], results)