Exemple #1
0
def _get_metadata_for_shard(realm_name, shard_key):
    shards_coll = api._get_shards_coll()
    shard_metadata, = shards_coll.find({
        'realm': realm_name,
        'shard_key': shard_key
    })
    return shard_metadata
Exemple #2
0
def _delete_source_data(collection_name, shard_key, manager):
    realm = metadata._get_realm_for_collection(collection_name)
    shard_field = realm['shard_field']

    shards_coll = api._get_shards_coll()
    shard_metadata, = shards_coll.find(
        {'realm': realm['name'], 'shard_key': shard_key})
    if shard_metadata['status'] != metadata.ShardStatus.POST_MIGRATION_DELETE:
        raise Exception('Shard not in delete state')

    current_location = shard_metadata['location']
    current_collection = _get_collection_from_location_string(
        current_location, collection_name)

    cursor = current_collection.find(
            {shard_field: shard_key}, {'_id': 1},
            no_cursor_timeout=True)
    try:
        for doc in cursor:
            current_collection.remove({'_id': doc['_id']})

            # Get the delete throttle out of the manager. This allows for the
            # insert throttle to be changed by another thread whilst maintaining
            # thread safety.
            delete_throttle = manager.delete_throttle
            if delete_throttle:
                time.sleep(delete_throttle)
            manager.inc_deleted()

    finally:
        cursor.close()
Exemple #3
0
def _do_copy(collection_name, shard_key):
    realm = metadata._get_realm_for_collection(collection_name)
    shard_field = realm['shard_field']

    shards_coll = api._get_shards_coll()
    shard_metadata, = shards_coll.find(
        {'realm': realm['name'], 'shard_key': shard_key})
    if shard_metadata['status'] != metadata.ShardStatus.MIGRATING_COPY:
        raise Exception('Shard not in copy state (phase 1)')

    current_location = shard_metadata['location']
    new_location = shard_metadata['new_location']

    current_collection = _get_collection_from_location_string(
        current_location, collection_name)

    new_collection = _get_collection_from_location_string(
        new_location, collection_name)

    query = {shard_field: shard_key}
    for record in current_collection.find(query):
        new_collection.insert(record, safe=False)

    result = new_collection.database.command('getLastError')
    if result['ok'] != 1:
        raise Exception('Failed to do copy! Mongo error: %s' % result['err'])
Exemple #4
0
def _delete_source_data(collection_name, shard_key, delete_throttle=None):
    realm = metadata._get_realm_for_collection(collection_name)
    shard_field = realm['shard_field']

    shards_coll = api._get_shards_coll()
    shard_metadata, = shards_coll.find(
        {'realm': realm['name'], 'shard_key': shard_key})
    if shard_metadata['status'] != metadata.ShardStatus.POST_MIGRATION_DELETE:
        raise Exception('Shard not in delete state')

    current_location = shard_metadata['location']
    current_collection = _get_collection_from_location_string(
        current_location, collection_name)

    cursor = current_collection.find(
            {shard_field: shard_key}, {'_id': 1},
            no_cursor_timeout=True)
    deleted = 0
    try:
        for doc in cursor:
            current_collection.remove({'_id': doc['_id']})
            if delete_throttle:
                time.sleep(delete_throttle)
            if deleted % 10000 == 0:
                _detail_log('%d records deleted' % deleted)
            deleted += 1

    finally:
        cursor.close()
Exemple #5
0
def _do_copy(collection_name, shard_key, manager):
    realm = metadata._get_realm_for_collection(collection_name)

    shard_metadata, = api._get_shards_coll().find({
        'realm': realm['name'],
        'shard_key': shard_key
    })
    if shard_metadata['status'] != metadata.ShardStatus.MIGRATING_COPY:
        raise Exception('Shard not in copy state (phase 1)')

    current_collection = _get_collection_from_location_string(
        shard_metadata['location'], collection_name)
    new_collection = _get_collection_from_location_string(
        shard_metadata['new_location'], collection_name)
    target_key = sniff_mongos_shard_key(new_collection) or ['_id']

    cursor = current_collection.find({realm['shard_field']: shard_key},
                                     no_cursor_timeout=True)
    try:
        # manager.insert_throttle and manager.insert_batch_size can change
        # in other thread so we reference them on each cycle
        for batch in batched_cursor_iterator(
                cursor, lambda: manager.insert_batch_size):
            try:
                result = new_collection.bulk_write(batch_of_upsert_ops(
                    batch, target_key),
                                                   ordered=True)
            except BulkWriteError as e:
                pretty_log(e.details)
                raise
            tum_ti_tum(manager.insert_throttle)
            manager.inc_inserted(by=result.bulk_api_result['nUpserted'])
    finally:
        cursor.close()
Exemple #6
0
def _delete_source_data(collection_name, shard_key, manager):
    realm = metadata._get_realm_for_collection(collection_name)
    shard_field = realm['shard_field']

    shards_coll = api._get_shards_coll()
    shard_metadata, = shards_coll.find({
        'realm': realm['name'],
        'shard_key': shard_key
    })
    if shard_metadata['status'] != metadata.ShardStatus.POST_MIGRATION_DELETE:
        raise Exception('Shard not in delete state')

    current_location = shard_metadata['location']
    current_collection = _get_collection_from_location_string(
        current_location, collection_name)

    cursor = current_collection.find({shard_field: shard_key}, {'_id': 1},
                                     no_cursor_timeout=True)
    try:
        # manager.insert_throttle and manager.insert_batch_size can change
        # in other thread so we reference them on each cycle
        for batch in batched_cursor_iterator(
                cursor, lambda: manager.delete_batch_size):
            _ids = [record['_id'] for record in batch]
            result = current_collection.delete_many({'_id': {'$in': _ids}})
            tum_ti_tum(manager.delete_throttle)
            manager.inc_deleted(by=result.raw_result['n'])
    finally:
        cursor.close()
Exemple #7
0
def _do_copy(collection_name, shard_key):
    realm = metadata._get_realm_for_collection(collection_name)
    shard_field = realm['shard_field']

    shards_coll = api._get_shards_coll()
    shard_metadata, = shards_coll.find({
        'realm': realm['name'],
        'shard_key': shard_key
    })
    if shard_metadata['status'] != metadata.ShardStatus.MIGRATING_COPY:
        raise Exception('Shard not in copy state (phase 1)')

    current_location = shard_metadata['location']
    new_location = shard_metadata['new_location']

    current_collection = _get_collection_from_location_string(
        current_location, collection_name)

    new_collection = _get_collection_from_location_string(
        new_location, collection_name)

    query = {shard_field: shard_key}
    for record in current_collection.find(query):
        new_collection.insert(record, safe=False)

    result = new_collection.database.command('getLastError')
    if result['ok'] != 1:
        raise Exception('Failed to do copy! Mongo error: %s' % result['err'])
Exemple #8
0
def _delete_source_data(collection_name, shard_key, delete_throttle=None):
    realm = metadata._get_realm_for_collection(collection_name)
    shard_field = realm['shard_field']

    shards_coll = api._get_shards_coll()
    shard_metadata, = shards_coll.find({
        'realm': realm['name'],
        'shard_key': shard_key
    })
    if shard_metadata['status'] != metadata.ShardStatus.POST_MIGRATION_DELETE:
        raise Exception('Shard not in delete state')

    current_location = shard_metadata['location']
    current_collection = _get_collection_from_location_string(
        current_location, collection_name)

    cursor = current_collection.find({shard_field: shard_key}, {'_id': 1},
                                     no_cursor_timeout=True)
    deleted = 0
    try:
        for doc in cursor:
            current_collection.remove({'_id': doc['_id']})
            if delete_throttle:
                time.sleep(delete_throttle)
            if deleted % 10000 == 0:
                _detail_log('%d records deleted' % deleted)
            deleted += 1

    finally:
        cursor.close()
Exemple #9
0
def _delete_source_data(collection_name, shard_key):
    realm = metadata._get_realm_for_collection(collection_name)
    shard_field = realm['shard_field']

    shards_coll = api._get_shards_coll()
    shard_metadata, = shards_coll.find(
        {'realm': realm['name'], 'shard_key': shard_key})
    if shard_metadata['status'] != metadata.ShardStatus.POST_MIGRATION_DELETE:
        raise Exception('Shard not in delete state')

    current_location = shard_metadata['location']
    current_collection = _get_collection_from_location_string(
        current_location, collection_name)

    cursor = current_collection.find({shard_field: shard_key}, {'_id': 1})
    for page in grouper(50, cursor):
        _ids = [doc['_id'] for doc in page]
        current_collection.remove({'_id': {'$in': _ids}})
Exemple #10
0
def _delete_source_data(collection_name, shard_key):
    realm = metadata._get_realm_for_collection(collection_name)
    shard_field = realm['shard_field']

    shards_coll = api._get_shards_coll()
    shard_metadata, = shards_coll.find({
        'realm': realm['name'],
        'shard_key': shard_key
    })
    if shard_metadata['status'] != metadata.ShardStatus.POST_MIGRATION_DELETE:
        raise Exception('Shard not in delete state')

    current_location = shard_metadata['location']
    current_collection = _get_collection_from_location_string(
        current_location, collection_name)

    cursor = current_collection.find({shard_field: shard_key}, {'_id': 1})
    for page in grouper(50, cursor):
        _ids = [doc['_id'] for doc in page]
        current_collection.remove({'_id': {'$in': _ids}})
Exemple #11
0
def _do_copy(collection_name, shard_key, manager):
    realm = metadata._get_realm_for_collection(collection_name)
    shard_field = realm['shard_field']

    shards_coll = api._get_shards_coll()
    shard_metadata, = shards_coll.find({
        'realm': realm['name'],
        'shard_key': shard_key
    })
    if shard_metadata['status'] != metadata.ShardStatus.MIGRATING_COPY:
        raise Exception('Shard not in copy state (phase 1)')

    current_location = shard_metadata['location']
    new_location = shard_metadata['new_location']

    current_collection = _get_collection_from_location_string(
        current_location, collection_name)

    new_collection = _get_collection_from_location_string(
        new_location, collection_name)

    query = {shard_field: shard_key}
    cursor = current_collection.find(query, no_cursor_timeout=True)
    try:
        for record in cursor:
            new_collection.insert(record, w=0)

            # Get the insert throttle out of the manager. This allows for the
            # insert throttle to be changed by another thread whilst maintaining
            # thread safety.
            insert_throttle = manager.insert_throttle
            if insert_throttle:
                time.sleep(insert_throttle)

            manager.inc_inserted()
    finally:
        cursor.close()

    result = new_collection.database.command('getLastError')
    if result['err']:
        raise Exception('Failed to do copy! Mongo error: %s' % result['err'])
Exemple #12
0
def _do_copy(collection_name, shard_key, manager):
    realm = metadata._get_realm_for_collection(collection_name)
    shard_field = realm['shard_field']

    shards_coll = api._get_shards_coll()
    shard_metadata, = shards_coll.find(
        {'realm': realm['name'], 'shard_key': shard_key})
    if shard_metadata['status'] != metadata.ShardStatus.MIGRATING_COPY:
        raise Exception('Shard not in copy state (phase 1)')

    current_location = shard_metadata['location']
    new_location = shard_metadata['new_location']

    current_collection = _get_collection_from_location_string(
        current_location, collection_name)

    new_collection = _get_collection_from_location_string(
        new_location, collection_name)

    query = {shard_field: shard_key}
    cursor = current_collection.find(query, no_cursor_timeout=True)
    try:
        for record in cursor:
            new_collection.insert(record, w=0)

            # Get the insert throttle out of the manager. This allows for the
            # insert throttle to be changed by another thread whilst maintaining
            # thread safety.
            insert_throttle = manager.insert_throttle
            if insert_throttle:
                time.sleep(insert_throttle)

            manager.inc_inserted()
    finally:
        cursor.close()

    result = new_collection.database.command('getLastError')
    if result['err']:
        raise Exception('Failed to do copy! Mongo error: %s' % result['err'])
Exemple #13
0
def _do_copy(collection_name, shard_key, insert_throttle=None):
    realm = metadata._get_realm_for_collection(collection_name)
    shard_field = realm['shard_field']

    shards_coll = api._get_shards_coll()
    shard_metadata, = shards_coll.find({
        'realm': realm['name'],
        'shard_key': shard_key
    })
    if shard_metadata['status'] != metadata.ShardStatus.MIGRATING_COPY:
        raise Exception('Shard not in copy state (phase 1)')

    current_location = shard_metadata['location']
    new_location = shard_metadata['new_location']

    current_collection = _get_collection_from_location_string(
        current_location, collection_name)

    new_collection = _get_collection_from_location_string(
        new_location, collection_name)

    query = {shard_field: shard_key}
    cursor = current_collection.find(query, no_cursor_timeout=True)
    inserted = 0
    try:
        for record in cursor:
            new_collection.insert(record, w=0)
            if inserted % 50000 == 0:
                _detail_log('%d records inserted' % inserted)
            if insert_throttle:
                time.sleep(insert_throttle)
            inserted += 1
    finally:
        cursor.close()

    result = new_collection.database.command('getLastError')
    if result['err']:
        raise Exception('Failed to do copy! Mongo error: %s' % result['err'])
Exemple #14
0
def _do_copy(collection_name, shard_key, insert_throttle=None):
    realm = metadata._get_realm_for_collection(collection_name)
    shard_field = realm['shard_field']

    shards_coll = api._get_shards_coll()
    shard_metadata, = shards_coll.find(
        {'realm': realm['name'], 'shard_key': shard_key})
    if shard_metadata['status'] != metadata.ShardStatus.MIGRATING_COPY:
        raise Exception('Shard not in copy state (phase 1)')

    current_location = shard_metadata['location']
    new_location = shard_metadata['new_location']

    current_collection = _get_collection_from_location_string(
        current_location, collection_name)

    new_collection = _get_collection_from_location_string(
        new_location, collection_name)

    query = {shard_field: shard_key}
    cursor = current_collection.find(query, no_cursor_timeout=True)
    inserted = 0
    try:
        for record in cursor:
            new_collection.insert(record, w=0)
            if inserted % 50000 == 0:
                _detail_log('%d records inserted' % inserted)
            if insert_throttle:
                time.sleep(insert_throttle)
            inserted += 1
    finally:
        cursor.close()

    result = new_collection.database.command('getLastError')
    if result['err']:
        raise Exception('Failed to do copy! Mongo error: %s' % result['err'])
Exemple #15
0
def _sync_from_oplog(collection_name, shard_key, oplog_pos):
    """Syncs the oplog to within a reasonable timeframe of "now".
    """
    conn = get_controlling_db().connection
    repl_coll = conn['local']['oplog.rs']

    cursor = repl_coll.find({'ts': {'$gt': oplog_pos}}, tailable=True)
    cursor = cursor.add_option(_QUERY_OPTIONS['oplog_replay'])
    cursor = cursor.hint([('$natural', 1)])

    realm = metadata._get_realm_for_collection(collection_name)
    shard_field = realm['shard_field']

    shards_coll = api._get_shards_coll()
    shard_metadata, = shards_coll.find(
        {'realm': realm['name'], 'shard_key': shard_key})

    current_location = shard_metadata['location']
    new_location = shard_metadata['new_location']

    current_collection = _get_collection_from_location_string(
        current_location, collection_name)

    new_collection = _get_collection_from_location_string(
        new_location, collection_name)

    shard_query = {shard_field: shard_key}

    current_namespace = "%s.%s" % (
        current_collection.database.name, current_collection.name)

    for r in cursor:
        if r['ns'] != current_namespace:
            continue

        if r['op'] in ['u', 'i']:
            # Check that this doc is part of our query set
            oid = r.get('o2', r['o'])['_id']
            object_query = {'_id': oid}
            object_query.update(shard_query)
            match = bool(
                current_collection.find(object_query).count())
        elif r['op'] == 'd':
            oid = r.get('o2', r['o'])['_id']
            object_query = {'_id': oid}
            object_query.update(shard_query)
            match = bool(
                new_collection.find(object_query).count())

        else:
            print 'Ignoring op', r['op'], r
            continue

        if not match:
            continue

        if r['op'] == 'u':
            blue(' - Updating %s with %s' % (oid, r['o']))
            new_collection.update(
                {'_id': oid}, r['o'], safe=True)

        elif r['op'] == 'i':
            try:
                new_collection.insert(r['o'], safe=True)
            except pymongo.errors.DuplicateKeyError:
                pass
        elif r['op'] == 'd':
            blue(' - Removing %s' % oid)
            new_collection.remove({'_id': oid}, safe=True)

        oplog_pos = r['ts']

    return oplog_pos
Exemple #16
0
def _get_metadata_for_shard(realm_name, shard_key):
    shards_coll = api._get_shards_coll()
    shard_metadata, = shards_coll.find(
        {'realm': realm_name, 'shard_key': shard_key})
    return shard_metadata
Exemple #17
0
def _sync_from_oplog(collection_name, shard_key, oplog_pos):
    """Syncs the oplog to within a reasonable timeframe of "now".
    """
    conn = get_controlling_db().connection
    repl_coll = conn['local']['oplog.rs']

    cursor = repl_coll.find({'ts': {'$gt': oplog_pos}}, tailable=True)
    cursor = cursor.add_option(_QUERY_OPTIONS['oplog_replay'])
    cursor = cursor.hint([('$natural', 1)])

    realm = metadata._get_realm_for_collection(collection_name)
    shard_field = realm['shard_field']

    shards_coll = api._get_shards_coll()
    shard_metadata, = shards_coll.find({
        'realm': realm['name'],
        'shard_key': shard_key
    })

    current_location = shard_metadata['location']
    new_location = shard_metadata['new_location']

    current_collection = _get_collection_from_location_string(
        current_location, collection_name)

    new_collection = _get_collection_from_location_string(
        new_location, collection_name)

    shard_query = {shard_field: shard_key}

    current_namespace = "%s.%s" % (current_collection.database.name,
                                   current_collection.name)

    for r in cursor:
        if r['ns'] != current_namespace:
            continue

        if r['op'] in ['u', 'i']:
            # Check that this doc is part of our query set
            oid = r.get('o2', r['o'])['_id']
            object_query = {'_id': oid}
            object_query.update(shard_query)
            match = bool(current_collection.find(object_query).count())
        elif r['op'] == 'd':
            oid = r.get('o2', r['o'])['_id']
            object_query = {'_id': oid}
            object_query.update(shard_query)
            match = bool(new_collection.find(object_query).count())

        else:
            print 'Ignoring op', r['op'], r
            continue

        if not match:
            continue

        if r['op'] == 'u':
            blue(' - Updating %s with %s' % (oid, r['o']))
            new_collection.update({'_id': oid}, r['o'], safe=True)

        elif r['op'] == 'i':
            try:
                new_collection.insert(r['o'], safe=True)
            except pymongo.errors.DuplicateKeyError:
                pass
        elif r['op'] == 'd':
            blue(' - Removing %s' % oid)
            new_collection.remove({'_id': oid}, safe=True)

        oplog_pos = r['ts']

    return oplog_pos