def _get_metadata_for_shard(realm_name, shard_key): shards_coll = api._get_shards_coll() shard_metadata, = shards_coll.find({ 'realm': realm_name, 'shard_key': shard_key }) return shard_metadata
def _delete_source_data(collection_name, shard_key, manager): realm = metadata._get_realm_for_collection(collection_name) shard_field = realm['shard_field'] shards_coll = api._get_shards_coll() shard_metadata, = shards_coll.find( {'realm': realm['name'], 'shard_key': shard_key}) if shard_metadata['status'] != metadata.ShardStatus.POST_MIGRATION_DELETE: raise Exception('Shard not in delete state') current_location = shard_metadata['location'] current_collection = _get_collection_from_location_string( current_location, collection_name) cursor = current_collection.find( {shard_field: shard_key}, {'_id': 1}, no_cursor_timeout=True) try: for doc in cursor: current_collection.remove({'_id': doc['_id']}) # Get the delete throttle out of the manager. This allows for the # insert throttle to be changed by another thread whilst maintaining # thread safety. delete_throttle = manager.delete_throttle if delete_throttle: time.sleep(delete_throttle) manager.inc_deleted() finally: cursor.close()
def _do_copy(collection_name, shard_key): realm = metadata._get_realm_for_collection(collection_name) shard_field = realm['shard_field'] shards_coll = api._get_shards_coll() shard_metadata, = shards_coll.find( {'realm': realm['name'], 'shard_key': shard_key}) if shard_metadata['status'] != metadata.ShardStatus.MIGRATING_COPY: raise Exception('Shard not in copy state (phase 1)') current_location = shard_metadata['location'] new_location = shard_metadata['new_location'] current_collection = _get_collection_from_location_string( current_location, collection_name) new_collection = _get_collection_from_location_string( new_location, collection_name) query = {shard_field: shard_key} for record in current_collection.find(query): new_collection.insert(record, safe=False) result = new_collection.database.command('getLastError') if result['ok'] != 1: raise Exception('Failed to do copy! Mongo error: %s' % result['err'])
def _delete_source_data(collection_name, shard_key, delete_throttle=None): realm = metadata._get_realm_for_collection(collection_name) shard_field = realm['shard_field'] shards_coll = api._get_shards_coll() shard_metadata, = shards_coll.find( {'realm': realm['name'], 'shard_key': shard_key}) if shard_metadata['status'] != metadata.ShardStatus.POST_MIGRATION_DELETE: raise Exception('Shard not in delete state') current_location = shard_metadata['location'] current_collection = _get_collection_from_location_string( current_location, collection_name) cursor = current_collection.find( {shard_field: shard_key}, {'_id': 1}, no_cursor_timeout=True) deleted = 0 try: for doc in cursor: current_collection.remove({'_id': doc['_id']}) if delete_throttle: time.sleep(delete_throttle) if deleted % 10000 == 0: _detail_log('%d records deleted' % deleted) deleted += 1 finally: cursor.close()
def _do_copy(collection_name, shard_key, manager): realm = metadata._get_realm_for_collection(collection_name) shard_metadata, = api._get_shards_coll().find({ 'realm': realm['name'], 'shard_key': shard_key }) if shard_metadata['status'] != metadata.ShardStatus.MIGRATING_COPY: raise Exception('Shard not in copy state (phase 1)') current_collection = _get_collection_from_location_string( shard_metadata['location'], collection_name) new_collection = _get_collection_from_location_string( shard_metadata['new_location'], collection_name) target_key = sniff_mongos_shard_key(new_collection) or ['_id'] cursor = current_collection.find({realm['shard_field']: shard_key}, no_cursor_timeout=True) try: # manager.insert_throttle and manager.insert_batch_size can change # in other thread so we reference them on each cycle for batch in batched_cursor_iterator( cursor, lambda: manager.insert_batch_size): try: result = new_collection.bulk_write(batch_of_upsert_ops( batch, target_key), ordered=True) except BulkWriteError as e: pretty_log(e.details) raise tum_ti_tum(manager.insert_throttle) manager.inc_inserted(by=result.bulk_api_result['nUpserted']) finally: cursor.close()
def _delete_source_data(collection_name, shard_key, manager): realm = metadata._get_realm_for_collection(collection_name) shard_field = realm['shard_field'] shards_coll = api._get_shards_coll() shard_metadata, = shards_coll.find({ 'realm': realm['name'], 'shard_key': shard_key }) if shard_metadata['status'] != metadata.ShardStatus.POST_MIGRATION_DELETE: raise Exception('Shard not in delete state') current_location = shard_metadata['location'] current_collection = _get_collection_from_location_string( current_location, collection_name) cursor = current_collection.find({shard_field: shard_key}, {'_id': 1}, no_cursor_timeout=True) try: # manager.insert_throttle and manager.insert_batch_size can change # in other thread so we reference them on each cycle for batch in batched_cursor_iterator( cursor, lambda: manager.delete_batch_size): _ids = [record['_id'] for record in batch] result = current_collection.delete_many({'_id': {'$in': _ids}}) tum_ti_tum(manager.delete_throttle) manager.inc_deleted(by=result.raw_result['n']) finally: cursor.close()
def _do_copy(collection_name, shard_key): realm = metadata._get_realm_for_collection(collection_name) shard_field = realm['shard_field'] shards_coll = api._get_shards_coll() shard_metadata, = shards_coll.find({ 'realm': realm['name'], 'shard_key': shard_key }) if shard_metadata['status'] != metadata.ShardStatus.MIGRATING_COPY: raise Exception('Shard not in copy state (phase 1)') current_location = shard_metadata['location'] new_location = shard_metadata['new_location'] current_collection = _get_collection_from_location_string( current_location, collection_name) new_collection = _get_collection_from_location_string( new_location, collection_name) query = {shard_field: shard_key} for record in current_collection.find(query): new_collection.insert(record, safe=False) result = new_collection.database.command('getLastError') if result['ok'] != 1: raise Exception('Failed to do copy! Mongo error: %s' % result['err'])
def _delete_source_data(collection_name, shard_key, delete_throttle=None): realm = metadata._get_realm_for_collection(collection_name) shard_field = realm['shard_field'] shards_coll = api._get_shards_coll() shard_metadata, = shards_coll.find({ 'realm': realm['name'], 'shard_key': shard_key }) if shard_metadata['status'] != metadata.ShardStatus.POST_MIGRATION_DELETE: raise Exception('Shard not in delete state') current_location = shard_metadata['location'] current_collection = _get_collection_from_location_string( current_location, collection_name) cursor = current_collection.find({shard_field: shard_key}, {'_id': 1}, no_cursor_timeout=True) deleted = 0 try: for doc in cursor: current_collection.remove({'_id': doc['_id']}) if delete_throttle: time.sleep(delete_throttle) if deleted % 10000 == 0: _detail_log('%d records deleted' % deleted) deleted += 1 finally: cursor.close()
def _delete_source_data(collection_name, shard_key): realm = metadata._get_realm_for_collection(collection_name) shard_field = realm['shard_field'] shards_coll = api._get_shards_coll() shard_metadata, = shards_coll.find( {'realm': realm['name'], 'shard_key': shard_key}) if shard_metadata['status'] != metadata.ShardStatus.POST_MIGRATION_DELETE: raise Exception('Shard not in delete state') current_location = shard_metadata['location'] current_collection = _get_collection_from_location_string( current_location, collection_name) cursor = current_collection.find({shard_field: shard_key}, {'_id': 1}) for page in grouper(50, cursor): _ids = [doc['_id'] for doc in page] current_collection.remove({'_id': {'$in': _ids}})
def _delete_source_data(collection_name, shard_key): realm = metadata._get_realm_for_collection(collection_name) shard_field = realm['shard_field'] shards_coll = api._get_shards_coll() shard_metadata, = shards_coll.find({ 'realm': realm['name'], 'shard_key': shard_key }) if shard_metadata['status'] != metadata.ShardStatus.POST_MIGRATION_DELETE: raise Exception('Shard not in delete state') current_location = shard_metadata['location'] current_collection = _get_collection_from_location_string( current_location, collection_name) cursor = current_collection.find({shard_field: shard_key}, {'_id': 1}) for page in grouper(50, cursor): _ids = [doc['_id'] for doc in page] current_collection.remove({'_id': {'$in': _ids}})
def _do_copy(collection_name, shard_key, manager): realm = metadata._get_realm_for_collection(collection_name) shard_field = realm['shard_field'] shards_coll = api._get_shards_coll() shard_metadata, = shards_coll.find({ 'realm': realm['name'], 'shard_key': shard_key }) if shard_metadata['status'] != metadata.ShardStatus.MIGRATING_COPY: raise Exception('Shard not in copy state (phase 1)') current_location = shard_metadata['location'] new_location = shard_metadata['new_location'] current_collection = _get_collection_from_location_string( current_location, collection_name) new_collection = _get_collection_from_location_string( new_location, collection_name) query = {shard_field: shard_key} cursor = current_collection.find(query, no_cursor_timeout=True) try: for record in cursor: new_collection.insert(record, w=0) # Get the insert throttle out of the manager. This allows for the # insert throttle to be changed by another thread whilst maintaining # thread safety. insert_throttle = manager.insert_throttle if insert_throttle: time.sleep(insert_throttle) manager.inc_inserted() finally: cursor.close() result = new_collection.database.command('getLastError') if result['err']: raise Exception('Failed to do copy! Mongo error: %s' % result['err'])
def _do_copy(collection_name, shard_key, manager): realm = metadata._get_realm_for_collection(collection_name) shard_field = realm['shard_field'] shards_coll = api._get_shards_coll() shard_metadata, = shards_coll.find( {'realm': realm['name'], 'shard_key': shard_key}) if shard_metadata['status'] != metadata.ShardStatus.MIGRATING_COPY: raise Exception('Shard not in copy state (phase 1)') current_location = shard_metadata['location'] new_location = shard_metadata['new_location'] current_collection = _get_collection_from_location_string( current_location, collection_name) new_collection = _get_collection_from_location_string( new_location, collection_name) query = {shard_field: shard_key} cursor = current_collection.find(query, no_cursor_timeout=True) try: for record in cursor: new_collection.insert(record, w=0) # Get the insert throttle out of the manager. This allows for the # insert throttle to be changed by another thread whilst maintaining # thread safety. insert_throttle = manager.insert_throttle if insert_throttle: time.sleep(insert_throttle) manager.inc_inserted() finally: cursor.close() result = new_collection.database.command('getLastError') if result['err']: raise Exception('Failed to do copy! Mongo error: %s' % result['err'])
def _do_copy(collection_name, shard_key, insert_throttle=None): realm = metadata._get_realm_for_collection(collection_name) shard_field = realm['shard_field'] shards_coll = api._get_shards_coll() shard_metadata, = shards_coll.find({ 'realm': realm['name'], 'shard_key': shard_key }) if shard_metadata['status'] != metadata.ShardStatus.MIGRATING_COPY: raise Exception('Shard not in copy state (phase 1)') current_location = shard_metadata['location'] new_location = shard_metadata['new_location'] current_collection = _get_collection_from_location_string( current_location, collection_name) new_collection = _get_collection_from_location_string( new_location, collection_name) query = {shard_field: shard_key} cursor = current_collection.find(query, no_cursor_timeout=True) inserted = 0 try: for record in cursor: new_collection.insert(record, w=0) if inserted % 50000 == 0: _detail_log('%d records inserted' % inserted) if insert_throttle: time.sleep(insert_throttle) inserted += 1 finally: cursor.close() result = new_collection.database.command('getLastError') if result['err']: raise Exception('Failed to do copy! Mongo error: %s' % result['err'])
def _do_copy(collection_name, shard_key, insert_throttle=None): realm = metadata._get_realm_for_collection(collection_name) shard_field = realm['shard_field'] shards_coll = api._get_shards_coll() shard_metadata, = shards_coll.find( {'realm': realm['name'], 'shard_key': shard_key}) if shard_metadata['status'] != metadata.ShardStatus.MIGRATING_COPY: raise Exception('Shard not in copy state (phase 1)') current_location = shard_metadata['location'] new_location = shard_metadata['new_location'] current_collection = _get_collection_from_location_string( current_location, collection_name) new_collection = _get_collection_from_location_string( new_location, collection_name) query = {shard_field: shard_key} cursor = current_collection.find(query, no_cursor_timeout=True) inserted = 0 try: for record in cursor: new_collection.insert(record, w=0) if inserted % 50000 == 0: _detail_log('%d records inserted' % inserted) if insert_throttle: time.sleep(insert_throttle) inserted += 1 finally: cursor.close() result = new_collection.database.command('getLastError') if result['err']: raise Exception('Failed to do copy! Mongo error: %s' % result['err'])
def _sync_from_oplog(collection_name, shard_key, oplog_pos): """Syncs the oplog to within a reasonable timeframe of "now". """ conn = get_controlling_db().connection repl_coll = conn['local']['oplog.rs'] cursor = repl_coll.find({'ts': {'$gt': oplog_pos}}, tailable=True) cursor = cursor.add_option(_QUERY_OPTIONS['oplog_replay']) cursor = cursor.hint([('$natural', 1)]) realm = metadata._get_realm_for_collection(collection_name) shard_field = realm['shard_field'] shards_coll = api._get_shards_coll() shard_metadata, = shards_coll.find( {'realm': realm['name'], 'shard_key': shard_key}) current_location = shard_metadata['location'] new_location = shard_metadata['new_location'] current_collection = _get_collection_from_location_string( current_location, collection_name) new_collection = _get_collection_from_location_string( new_location, collection_name) shard_query = {shard_field: shard_key} current_namespace = "%s.%s" % ( current_collection.database.name, current_collection.name) for r in cursor: if r['ns'] != current_namespace: continue if r['op'] in ['u', 'i']: # Check that this doc is part of our query set oid = r.get('o2', r['o'])['_id'] object_query = {'_id': oid} object_query.update(shard_query) match = bool( current_collection.find(object_query).count()) elif r['op'] == 'd': oid = r.get('o2', r['o'])['_id'] object_query = {'_id': oid} object_query.update(shard_query) match = bool( new_collection.find(object_query).count()) else: print 'Ignoring op', r['op'], r continue if not match: continue if r['op'] == 'u': blue(' - Updating %s with %s' % (oid, r['o'])) new_collection.update( {'_id': oid}, r['o'], safe=True) elif r['op'] == 'i': try: new_collection.insert(r['o'], safe=True) except pymongo.errors.DuplicateKeyError: pass elif r['op'] == 'd': blue(' - Removing %s' % oid) new_collection.remove({'_id': oid}, safe=True) oplog_pos = r['ts'] return oplog_pos
def _get_metadata_for_shard(realm_name, shard_key): shards_coll = api._get_shards_coll() shard_metadata, = shards_coll.find( {'realm': realm_name, 'shard_key': shard_key}) return shard_metadata
def _sync_from_oplog(collection_name, shard_key, oplog_pos): """Syncs the oplog to within a reasonable timeframe of "now". """ conn = get_controlling_db().connection repl_coll = conn['local']['oplog.rs'] cursor = repl_coll.find({'ts': {'$gt': oplog_pos}}, tailable=True) cursor = cursor.add_option(_QUERY_OPTIONS['oplog_replay']) cursor = cursor.hint([('$natural', 1)]) realm = metadata._get_realm_for_collection(collection_name) shard_field = realm['shard_field'] shards_coll = api._get_shards_coll() shard_metadata, = shards_coll.find({ 'realm': realm['name'], 'shard_key': shard_key }) current_location = shard_metadata['location'] new_location = shard_metadata['new_location'] current_collection = _get_collection_from_location_string( current_location, collection_name) new_collection = _get_collection_from_location_string( new_location, collection_name) shard_query = {shard_field: shard_key} current_namespace = "%s.%s" % (current_collection.database.name, current_collection.name) for r in cursor: if r['ns'] != current_namespace: continue if r['op'] in ['u', 'i']: # Check that this doc is part of our query set oid = r.get('o2', r['o'])['_id'] object_query = {'_id': oid} object_query.update(shard_query) match = bool(current_collection.find(object_query).count()) elif r['op'] == 'd': oid = r.get('o2', r['o'])['_id'] object_query = {'_id': oid} object_query.update(shard_query) match = bool(new_collection.find(object_query).count()) else: print 'Ignoring op', r['op'], r continue if not match: continue if r['op'] == 'u': blue(' - Updating %s with %s' % (oid, r['o'])) new_collection.update({'_id': oid}, r['o'], safe=True) elif r['op'] == 'i': try: new_collection.insert(r['o'], safe=True) except pymongo.errors.DuplicateKeyError: pass elif r['op'] == 'd': blue(' - Removing %s' % oid) new_collection.remove({'_id': oid}, safe=True) oplog_pos = r['ts'] return oplog_pos