def apply(self):
        """ Apply oplogs.
        """
        oplog_vecs = []
        for ns, oplogs in self._map.items():
            dbname, collname = mongo_utils.parse_namespace(ns)
            n = len(oplogs) // self._batch_size + 1
            if n == 1:
                vec = OplogVector(dbname, collname)
                for oplog in oplogs:
                    op = self.__convert(oplog)
                    assert op is not None
                    vec._oplogs.append(op)
                oplog_vecs.append(vec)
            else:
                vecs = [OplogVector(dbname, collname) for i in range(n)]
                for oplog in oplogs:
                    op = self.__convert(oplog)
                    assert op is not None
                    # filter of UpdateOne/ReplaceOne/DeleteOne is {'_id': ObjectID}
                    # @ref https://github.com/mongodb/mongo-python-driver/blob/master/pymongo/operations.py
                    m = self.__hash(op._filter['_id'])
                    vecs[m % n]._oplogs.append(op)
                oplog_vecs.extend(vecs)

        for vec in oplog_vecs:
            if vec._oplogs:
                self._pool.spawn(self._mc.bulk_write, vec._dbname,
                                 vec._collname, vec._oplogs)
        self._pool.join()
예제 #2
0
 def valid_oplog(self, oplog):
     if not self._include_colls:
         return True
     op = oplog['op']
     ns = oplog['ns']
     if op == 'n':
         return False
     elif op == 'c':
         dbname, _ = parse_namespace(ns)
         return dbname in self._related_dbs
     else:
         return self.valid_ns(ns)
예제 #3
0
    def apply(self):
        """ Apply oplogs.
        """
        oplog_vecs = []
        for ns, oplogs in self._map.items():
            dbname, collname = mongo_utils.parse_namespace(ns)
            vec = OplogVector(dbname, collname)
            for oplog in oplogs:
                op = self.__convert(oplog)
                assert op is not None
                vec._oplogs.append(op)
            oplog_vecs.append(vec)

        for vec in oplog_vecs:
            if vec._oplogs:
                self._pool.spawn(self._mc.bulk_write, vec._dbname,
                                 vec._collname, vec._oplogs)
        self._pool.join()
예제 #4
0
    def _replay_oplog(self, start_optime):
        """ Replay oplog.
        """
        self._last_optime = start_optime

        n_total = 0
        n_skip = 0

        while True:
            try:
                start_optime_valid = False
                need_log = False
                host, port = self._src.client().address
                log.info('try to sync oplog from %s on %s:%d' %
                         (self._last_optime, host, port))
                cursor = self._src.tail_oplog(start_optime)
            except IndexError as e:
                log.error(e)
                log.error('%s not found, terminate' % self._last_optime)
                return
            except Exception as e:
                log.error('get oplog cursor failed: %s' % e)
                continue

            # loop: read and apply oplog
            while True:
                try:
                    if need_log:
                        self._log_optime(self._last_optime)
                        self._log_progress()
                        need_log = False

                    if not cursor.alive:
                        log.error('cursor is dead')
                        raise pymongo.errors.AutoReconnect

                    oplog = cursor.next()
                    n_total += 1

                    # check start optime once
                    if not start_optime_valid:
                        if oplog['ts'] == self._last_optime:
                            log.info('oplog is ok: %s' % self._last_optime)
                            start_optime_valid = True
                        else:
                            log.error('oplog %s is stale, terminate' %
                                      self._last_optime)
                            return

                    if oplog['op'] == 'n':  # no-op
                        self._last_optime = oplog['ts']
                        need_log = True
                        continue

                    # validate oplog
                    if not self._conf.data_filter.valid_oplog(oplog):
                        n_skip += 1
                        self._last_optime = oplog['ts']
                        need_log = True
                        continue

                    dbname, collname = mongo_utils.parse_namespace(oplog['ns'])
                    dst_dbname, dst_collname = self._conf.db_coll_mapping(
                        dbname, collname)
                    if dst_dbname != dbname or dst_collname != collname:
                        oplog['ns'] = '%s.%s' % (dst_dbname, dst_collname)

                    if self._stage == Stage.post_initial_sync:
                        if self._multi_oplog_replayer:
                            if mongo_utils.is_command(oplog):
                                self._multi_oplog_replayer.apply(
                                    ignore_duplicate_key_error=True)
                                self._multi_oplog_replayer.clear()
                                self._dst.apply_oplog(oplog)
                                self._last_optime = oplog['ts']
                                need_log = True
                            else:
                                self._multi_oplog_replayer.push(oplog)
                                if oplog[
                                        'ts'] == self._initial_sync_end_optime or self._multi_oplog_replayer.count(
                                        ) == self._oplog_batchsize:
                                    self._multi_oplog_replayer.apply(
                                        ignore_duplicate_key_error=True)
                                    self._multi_oplog_replayer.clear()
                                    self._last_optime = oplog['ts']
                                    need_log = True
                        else:
                            self._dst.apply_oplog(
                                oplog, ignore_duplicate_key_error=True)
                            self._last_optime = oplog['ts']
                            need_log = True

                        if oplog['ts'] == self._initial_sync_end_optime:
                            log.info('step into stage: oplog_sync')
                            self._stage = Stage.oplog_sync
                    else:
                        if self._multi_oplog_replayer:
                            if mongo_utils.is_command(oplog):
                                self._multi_oplog_replayer.apply()
                                self._multi_oplog_replayer.clear()
                                self._dst.apply_oplog(oplog)
                                self._last_optime = oplog['ts']
                                need_log = True
                            else:
                                self._multi_oplog_replayer.push(oplog)
                                if self._multi_oplog_replayer.count(
                                ) == self._oplog_batchsize:
                                    self._multi_oplog_replayer.apply()
                                    self._multi_oplog_replayer.clear()
                                    self._last_optime = oplog['ts']
                                    need_log = True
                        else:
                            self._dst.apply_oplog(oplog)
                            self._last_optime = oplog['ts']
                            need_log = True
                except StopIteration as e:
                    if self._multi_oplog_replayer and self._multi_oplog_replayer.count(
                    ) > 0:
                        self._multi_oplog_replayer.apply()
                        self._multi_oplog_replayer.clear()
                        self._last_optime = self._multi_oplog_replayer.last_optime(
                        )
                        need_log = True
                    # no more oplogs, wait a moment
                    time.sleep(0.1)
                    self._log_optime(self._last_optime)
                    self._log_progress('latest')
                except pymongo.errors.DuplicateKeyError as e:
                    if self._stage == Stage.oplog_sync:
                        log.error(e)
                        log.error('terminate')
                        return
                    else:
                        log.error('ignore duplicate key error: %s' % e)
                        continue
                except pymongo.errors.AutoReconnect as e:
                    log.error(e)
                    self._src.reconnect()
                    break
예제 #5
0
    def _sync_oplog(self, oplog_start):
        """ Replay oplog.
        """
        self._last_bulk_optime = oplog_start

        n_total = 0
        n_skip = 0

        while True:
            # try to get cursor until success
            try:
                host, port = self._src.client().address
                log.info('try to sync oplog from %s on %s:%d' %
                         (self._last_bulk_optime, host, port))
                # set codec options to guarantee the order of keys in command
                coll = self._src.client()['local'].get_collection(
                    'oplog.rs',
                    codec_options=bson.codec_options.CodecOptions(
                        document_class=bson.son.SON))
                cursor = coll.find(
                    {'ts': {
                        '$gte': oplog_start
                    }},
                    cursor_type=pymongo.cursor.CursorType.TAILABLE_AWAIT,
                    no_cursor_timeout=True)

                # New in version 3.2
                # src_version = mongo_utils.get_version(self._src.client())
                # if mongo_utils.version_higher_or_equal(src_version, '3.2.0'):
                #     cursor.max_await_time_ms(1000)

                valid_start_optime = False  # need to validate

                while True:
                    try:
                        if not cursor.alive:
                            log.error('cursor is dead')
                            raise pymongo.errors.AutoReconnect

                        oplog = cursor.next()
                        n_total += 1

                        if not valid_start_optime:
                            if oplog['ts'] == oplog_start:
                                log.info('oplog is ok: %s' % oplog_start)
                                valid_start_optime = True
                            else:
                                log.error('oplog %s is stale, terminate' %
                                          oplog_start)
                                return

                        # validate oplog
                        if not self._conf.data_filter.valid_oplog(oplog):
                            n_skip += 1
                            self._last_optime = oplog['ts']
                            continue

                        op = oplog['op']
                        ns = oplog['ns']

                        if op == 'i':  # insert
                            dbname, collname = parse_namespace(ns)
                            idxname, typename = self._conf.db_coll_mapping(
                                dbname, collname)
                            fields = self._conf.fieldmap.get(
                                gen_namespace(dbname, collname))

                            doc = oplog['o']
                            id = str(doc['_id'])
                            del doc['_id']
                            if fields:
                                doc = gen_doc_with_fields(doc, fields)
                            if doc:
                                self._action_buf.append({
                                    '_op_type': 'index',
                                    '_index': idxname,
                                    '_type': typename,
                                    '_id': id,
                                    '_source': doc
                                })

                        elif op == 'u':  # update
                            dbname, collname = parse_namespace(ns)
                            idxname, typename = self._conf.db_coll_mapping(
                                dbname, collname)
                            fields = self._conf.fieldmap.get(
                                gen_namespace(dbname, collname))

                            id = str(oplog['o2']['_id'])

                            if '$set' in oplog['o']:
                                doc = {}
                                for k, v in oplog['o']['$set'].iteritems():
                                    if not fields or k in fields:
                                        sub_doc = doc_flat_to_nested(
                                            k.split('.'), v)
                                        merge_doc(doc, sub_doc)
                                if doc:
                                    self._action_buf.append({
                                        '_op_type':
                                        'update',
                                        '_index':
                                        idxname,
                                        '_type':
                                        typename,
                                        '_id':
                                        id,
                                        '_retry_on_conflict':
                                        3,
                                        'doc':
                                        doc,
                                        'doc_as_upsert':
                                        True
                                    })

                            if '$unset' in oplog['o']:
                                script_statements = []
                                for keypath in oplog['o']['$unset'].iterkeys():
                                    if not fields or keypath in fields:
                                        pos = keypath.rfind('.')
                                        if pos >= 0:
                                            script_statements.append(
                                                'ctx._source.%s.remove("%s")' %
                                                (keypath[:pos],
                                                 keypath[pos + 1:]))
                                        else:
                                            script_statements.append(
                                                'ctx._source.remove("%s")' %
                                                keypath)
                                if script_statements:
                                    doc = {
                                        'script': '; '.join(script_statements)
                                    }
                                    self._action_buf.append({
                                        '_op_type':
                                        'update',
                                        '_index':
                                        idxname,
                                        '_type':
                                        typename,
                                        '_id':
                                        id,
                                        '_retry_on_conflict':
                                        3,
                                        'script':
                                        doc['script']
                                    })

                            if '$set' not in oplog[
                                    'o'] and '$unset' not in oplog['o']:
                                log.warn('unexpect oplog: %s', oplog['o'])

                        elif op == 'd':  # delete
                            dbname, collname = parse_namespace(ns)
                            idxname, typename = self._conf.db_coll_mapping(
                                dbname, collname)
                            id = str(oplog['o']['_id'])
                            self._action_buf.append({
                                '_op_type': 'delete',
                                '_index': idxname,
                                '_type': typename,
                                '_id': id
                            })

                        elif op == 'c':  # command
                            dbname, _ = parse_namespace(ns)
                            idxname = self._conf.db_mapping(dbname)
                            if 'drop' in oplog['o']:
                                # TODO
                                # how to delete type?
                                pass
                                log.warn(
                                    'you should implement document type deletion.'
                                )
                            if 'dropDatabase' in oplog['o']:
                                # delete index
                                self._dst.client().indices.delete(
                                    index=idxname)

                        elif op == 'n':  # no-op
                            pass
                        else:
                            log.error('invalid optype: %s' % oplog)

                        # flush
                        if self._action_buf_full():
                            self._dst.bulk_write(self._action_buf)
                            self._action_buf = []
                            self._last_bulk_optime = oplog['ts']

                        self._last_optime = oplog['ts']
                        self._log_optime(oplog['ts'])
                        self._log_progress()
                    except StopIteration as e:
                        # flush
                        if len(self._action_buf) > 0:
                            self._dst.bulk_write(self._action_buf)
                            self._action_buf = []
                            self._last_bulk_optime = self._last_optime
                        self._log_optime(self._last_optime)
                        self._log_progress('latest')
                        time.sleep(0.1)
                    except pymongo.errors.AutoReconnect as e:
                        log.error(e)
                        self._src.reconnect()
                        break
                    except elasticsearch.helpers.BulkIndexError as e:
                        log.error(e)
                        self._action_buf = []
            except IndexError as e:
                log.error(e)
                log.error('%s not found, terminate' % oplog_start)
                return
예제 #6
0
    def _replay_oplog(self, start_optime):
        """ Replay oplog.
        """
        self._last_optime = start_optime

        n_total = 0
        n_skip = 0

        while True:
            # try to get cursor until success
            try:
                start_optime_valid = False
                need_log = False
                host, port = self._src.client().address
                log.info('try to sync oplog from %s on %s:%d' %
                         (self._last_optime, host, port))
                cursor = self._src.tail_oplog(start_optime)

                while True:
                    try:
                        if need_log:
                            self._log_optime(self._last_optime)
                            self._log_progress()
                            need_log = False

                        if not cursor.alive:
                            log.error('cursor is dead')
                            raise pymongo.errors.AutoReconnect

                        oplog = cursor.next()
                        n_total += 1

                        # check start optime once
                        if not start_optime_valid:
                            if oplog['ts'] == self._last_optime:
                                log.info('oplog is ok: %s' % self._last_optime)
                                start_optime_valid = True
                            else:
                                log.error('oplog %s is stale, terminate' %
                                          self._last_optime)
                                return

                        if oplog['op'] == 'n':  # no-op
                            self._last_optime = oplog['ts']
                            need_log = True
                            continue

                        # validate oplog only for mongodb
                        if not self._conf.data_filter.valid_oplog(oplog):
                            n_skip += 1
                            self._last_optime = oplog['ts']
                            need_log = True
                            continue

                        dbname, collname = mongo_utils.parse_namespace(
                            oplog['ns'])
                        dst_dbname, dst_collname = self._conf.db_coll_mapping(
                            dbname, collname)
                        if dst_dbname != dbname or dst_collname != collname:
                            oplog['ns'] = '%s.%s' % (dst_dbname, dst_collname)

                        if self._multi_oplog_replayer:
                            if mongo_utils.is_command(oplog):
                                self._multi_oplog_replayer.apply()
                                self._multi_oplog_replayer.clear()
                                self._dst.replay_oplog(oplog)
                                self._last_optime = oplog['ts']
                                need_log = True
                            else:
                                self._multi_oplog_replayer.push(oplog)
                                if self._multi_oplog_replayer.count() >= 1000:
                                    self._multi_oplog_replayer.apply()
                                    self._multi_oplog_replayer.clear()
                                    self._last_optime = oplog['ts']
                                    need_log = True
                        else:
                            self._dst.replay_oplog(oplog)
                            self._last_optime = oplog['ts']
                            need_log = True
                    except StopIteration as e:
                        if self._multi_oplog_replayer.count() > 0:
                            self._multi_oplog_replayer.apply()
                            self._multi_oplog_replayer.clear()
                            self._last_optime = self._multi_oplog_replayer.last_optime(
                            )
                            need_log = True
                        # no more oplogs, wait a moment
                        time.sleep(0.1)
                        self._log_optime(self._last_optime)
                        self._log_progress('latest')
                    except pymongo.errors.AutoReconnect as e:
                        log.error(e)
                        self._src.reconnect()
                        break
            except IndexError as e:
                log.error(e)
                log.error('%s not found, terminate' % self._last_optime)
                return
예제 #7
0
    def apply_oplog(self, oplog):
        """ Replay oplog.
        """
        dbname, collname = mongo_utils.parse_namespace(oplog['ns'])
        while True:
            try:
                op = oplog['op']  # 'n' or 'i' or 'u' or 'c' or 'd'

                if op == 'i':  # insert
                    if '_id' in oplog['o']:
                        self._mc[dbname][collname].replace_one({'_id': oplog['o']['_id']}, oplog['o'], upsert=True)
                    else:
                        # create index
                        # insert into db.system.indexes
                        self._mc[dbname][collname].insert(oplog['o'], check_keys=False)
                elif op == 'u':  # update
                    self._mc[dbname][collname].update(oplog['o2'], oplog['o'])
                elif op == 'd':  # delete
                    self._mc[dbname][collname].delete_one(oplog['o'])
                elif op == 'c':  # command
                    # FIX ISSUE #4 and #5
                    # if use option '--colls' to sync target collections,
                    # commands running on other collections in the same database may replay failed

                    if "renameCollection" in oplog['o']:
                        # rename collection
                        src = oplog['o']['renameCollection']
                        to = oplog['o']['to']
                        drop_target = oplog['o'].get('dropTarget', False)
                        try:
                            self._mc["admin"].command("renameCollection", src, to=to, dropTarget=drop_target)
                        except pymongo.errors.OperationFailure as e:
                            log.error('renameCollection %s: %s' % (e, oplog))
                        return

                    try:
                        self._mc[dbname].command(oplog['o'])
                    except pymongo.errors.OperationFailure as e:
                        log.error('%s: %s' % (e, oplog))
                elif op == 'n':  # no-op
                    pass
                else:
                    log.error('invaid op: %s' % oplog)
                return
            except pymongo.errors.AutoReconnect as e:
                self.reconnect()
            except pymongo.errors.DuplicateKeyError as e:
                raise e  # handle error in syncer
            except pymongo.errors.WriteError as e:
                log.error('%s' % e)

                # For case:
                #   Update the values of shard key fields when syncing from replica set to sharded cluster.
                #
                # Once you shard a collection, the shard key and the shard key values are immutable.
                # Reference: https://docs.mongodb.com/manual/core/sharding-shard-key/
                if self._mc.is_mongos and oplog['op'] == 'u' and 'the (immutable) field' in str(e):
                    old_doc = self._mc[dbname][collname].find_one(oplog['o2'])
                    if not old_doc:
                        log.error('replay update failed: document not found:', oplog['o2'])
                        sys.exit(1)
                    if '$set' in oplog['o']:
                        new_doc = old_doc.update(oplog['o']['$set'])
                    else:
                        new_doc = oplog['o']

                    # TODO: here need a transaction to delete old and insert new

                    # delete old document
                    res = self._mc[dbname][collname].delete_one(oplog['o2'])
                    if res.deleted_count != 1:
                        log.error('replay update failed: delete old document failed:', oplog['o2'])
                        sys.exit(1)
                    # insert new document
                    res = self._dst_mc[dbname][collname].insert_one(new_doc)
                    if not res.inserted_id:
                        log.error('replay update failed: insert new document failed:', new_doc)
                        sys.exit(1)
예제 #8
0
    def _replay_oplog(self, oplog_start):
        """ Replay oplog.
        """
        self._last_bulk_optime = oplog_start

        n_total = 0
        n_skip = 0

        while True:
            # try to get cursor until success
            try:
                host, port = self._src.client().address
                log.info('try to sync oplog from %s on %s:%d' % (self._last_bulk_optime, host, port))
                # set codec options to guarantee the order of keys in command
                coll = self._src.client()['local'].get_collection('oplog.rs',
                                                                  codec_options=bson.codec_options.CodecOptions(document_class=bson.son.SON))
                cursor = coll.find({'ts': {'$gte': oplog_start}},
                                   cursor_type=pymongo.cursor.CursorType.TAILABLE_AWAIT,
                                   no_cursor_timeout=True)

                # New in version 3.2
                # src_version = mongo_utils.get_version(self._src.client())
                # if mongo_utils.version_higher_or_equal(src_version, '3.2.0'):
                #     cursor.max_await_time_ms(1000)

                valid_start_optime = False  # need to validate

                while True:
                    try:
                        if not cursor.alive:
                            log.error('cursor is dead')
                            raise pymongo.errors.AutoReconnect

                        oplog = cursor.next()
                        n_total += 1

                        if not valid_start_optime:
                            if oplog['ts'] == oplog_start:
                                log.info('oplog is ok: %s' % oplog_start)
                                valid_start_optime = True
                            else:
                                log.error('oplog %s is stale, terminate' % oplog_start)
                                return

                        # validate oplog
                        if not self._conf.data_filter.valid_oplog(oplog):
                            n_skip += 1
                            self._last_optime = oplog['ts']
                            continue

                        op = oplog['op']
                        ns = oplog['ns']

                        if op == 'i':  # insert
                            dbname, collname = parse_namespace(ns)
                            idxname, typename = self._conf.db_coll_mapping(dbname, collname)
                            fields = self._conf.fieldmap.get(gen_namespace(dbname, collname))

                            doc = oplog['o']
                            id = str(doc['_id'])
                            del doc['_id']
                            if fields:
                                doc = gen_doc_with_fields(doc, fields)
                            if doc:
                                self._action_buf.append({'_op_type': 'index', '_index': idxname, '_type': typename, '_id': id, '_source': doc})

                        elif op == 'u':  # update
                            dbname, collname = parse_namespace(ns)
                            idxname, typename = self._conf.db_coll_mapping(dbname, collname)
                            fields = self._conf.fieldmap.get(gen_namespace(dbname, collname))

                            id = str(oplog['o2']['_id'])

                            if '$set' in oplog['o']:
                                doc = {}
                                for k, v in oplog['o']['$set'].iteritems():
                                    if not fields or k in fields:
                                        sub_doc = doc_flat_to_nested(k.split('.'), v)
                                        merge_doc(doc, sub_doc)
                                if doc:
                                    self._action_buf.append({'_op_type': 'update',
                                                             '_index': idxname,
                                                             '_type': typename,
                                                             '_id': id,
                                                             '_retry_on_conflict': 3,
                                                             'doc': doc,
                                                             'doc_as_upsert': True})

                            if '$unset' in oplog['o']:
                                script_statements = []
                                for keypath in oplog['o']['$unset'].iterkeys():
                                    if not fields or keypath in fields:
                                        pos = keypath.rfind('.')
                                        if pos >= 0:
                                            script_statements.append('ctx._source.%s.remove("%s")' % (keypath[:pos], keypath[pos+1:]))
                                        else:
                                            script_statements.append('ctx._source.remove("%s")' % keypath)
                                if script_statements:
                                    doc = {'script': '; '.join(script_statements)}
                                    self._action_buf.append({'_op_type': 'update',
                                                             '_index': idxname,
                                                             '_type': typename,
                                                             '_id': id,
                                                             '_retry_on_conflict': 3,
                                                             'script': doc['script']})

                            if '$set' not in oplog['o'] and '$unset' not in oplog['o']:
                                log.warn('unexpect oplog: %s', oplog['o'])

                        elif op == 'd':  # delete
                            dbname, collname = parse_namespace(ns)
                            idxname, typename = self._conf.db_coll_mapping(dbname, collname)
                            id = str(oplog['o']['_id'])
                            self._action_buf.append({'_op_type': 'delete', '_index': idxname, '_type': typename, '_id': id})

                        elif op == 'c':  # command
                            dbname, _ = parse_namespace(ns)
                            idxname = self._conf.db_mapping(dbname)
                            if 'drop' in oplog['o']:
                                # TODO
                                # how to delete type?
                                pass
                                log.warn('you should implement document type deletion.')
                            if 'dropDatabase' in oplog['o']:
                                # delete index
                                self._dst.client().indices.delete(index=idxname)

                        elif op == 'n':  # no-op
                            pass
                        else:
                            log.error('invalid optype: %s' % oplog)

                        # flush
                        if self._action_buf_full():
                            self._dst.bulk_write(self._action_buf)
                            self._action_buf = []
                            self._last_bulk_optime = oplog['ts']

                        self._last_optime = oplog['ts']
                        self._log_optime(oplog['ts'])
                        self._log_progress()
                    except StopIteration as e:
                        # flush
                        if len(self._action_buf) > 0:
                            self._dst.bulk_write(self._action_buf)
                            self._action_buf = []
                            self._last_bulk_optime = self._last_optime
                        self._log_optime(self._last_optime)
                        self._log_progress('latest')
                        time.sleep(0.1)
                    except pymongo.errors.AutoReconnect as e:
                        log.error(e)
                        self._src.reconnect()
                        break
                    except elasticsearch.helpers.BulkIndexError as e:
                        log.error(e)
                        self._action_buf = []
            except IndexError as e:
                log.error(e)
                log.error('%s not found, terminate' % oplog_start)
                return
예제 #9
0
    def apply_oplog(self, oplog, ignore_duplicate_key_error=False, print_log=False):
        """ Apply oplog.
        """
        dbname, collname = mongo_utils.parse_namespace(oplog['ns'])
        while True:
            try:
                op = oplog['op']  # 'n' or 'i' or 'u' or 'c' or 'd'
                if op == 'i':  # insert
                    if '_id' in oplog['o']:
                        self._mc[dbname][collname].replace_one({'_id': oplog['o']['_id']}, oplog['o'], upsert=True)
                    else:
                        # create index
                        # insert into db.system.indexes
                        self._mc[dbname][collname].insert(oplog['o'], check_keys=False)
                elif op == 'u':  # update
                    self._mc[dbname][collname].update(oplog['o2'], oplog['o'])
                elif op == 'd':  # delete
                    self._mc[dbname][collname].delete_one(oplog['o'])
                elif op == 'c':  # command
                    # FIX ISSUE #4 and #5
                    # if use '--colls' option to sync target collections,
                    # running command that belongs to exclusive brother collections in the same database may failed.
                    # Just skip it.
                    try:
                        self._mc[dbname].command(oplog['o'])
                    except pymongo.errors.OperationFailure as e:
                        log.info('%s: %s' % (e, oplog))
                elif op == 'n':  # no-op
                    pass
                else:
                    log.error('invalid op: %s' % oplog)
                return
            except pymongo.errors.AutoReconnect as e:
                self.reconnect()
                continue
            except pymongo.errors.DuplicateKeyError as e:
                if ignore_duplicate_key_error:
                    log.info('ignore duplicate key error: %s :%s' % (e, oplog))
                    break
                else:
                    log.error('%s: %s' % (e, oplog))
                    sys.exit(1)
            except pymongo.errors.WriteError as e:
                log.error('%s' % e)

                # For case:
                #   Update the values of shard key fields when syncing from replica set to sharded cluster.
                #
                # Once you shard a collection, the shard key and the shard key values are immutable.
                # Reference: https://docs.mongodb.com/manual/core/sharding-shard-key/
                if self._mc.is_mongos and oplog['op'] == 'u' and 'the (immutable) field' in str(e):
                    old_doc = self._mc[dbname][collname].find_one(oplog['o2'])
                    if not old_doc:
                        log.error('replay update failed: document not found:', oplog['o2'])
                        sys.exit(1)
                    if '$set' in oplog['o']:
                        new_doc = old_doc.update(oplog['o']['$set'])
                    else:
                        new_doc = oplog['o']

                    # TODO: here need a transaction to delete old and insert new

                    # delete old document
                    res = self._mc[dbname][collname].delete_one(oplog['o2'])
                    if res.deleted_count != 1:
                        log.error('replay update failed: delete old document failed:', oplog['o2'])
                        sys.exit(1)
                    # insert new document
                    res = self._mc[dbname][collname].insert_one(new_doc)
                    if not res.inserted_id:
                        log.error('replay update failed: insert new document failed:', new_doc)
                        sys.exit(1)
예제 #10
0
 def valid_ns(self, ns):
     dbname, collname = parse_namespace(ns)
     return self.valid_coll(dbname, collname)
예제 #11
0
    def apply_oplog(self, oplog):
        """ Replay oplog.
        """
        dbname, collname = mongo_utils.parse_namespace(oplog['ns'])
        while True:
            try:
                op = oplog['op']  # 'n' or 'i' or 'u' or 'c' or 'd'

                if op == 'i':  # insert
                    if '_id' in oplog['o']:
                        self._mc[dbname][collname].replace_one({'_id': oplog['o']['_id']}, oplog['o'], upsert=True)
                    else:
                        # create index
                        # insert into db.system.indexes
                        self._mc[dbname][collname].insert(oplog['o'], check_keys=False)
                elif op == 'u':  # update
                    self._mc[dbname][collname].update(oplog['o2'], oplog['o'])
                elif op == 'd':  # delete
                    self._mc[dbname][collname].delete_one(oplog['o'])
                elif op == 'c':  # command
                    # FIX ISSUE #4 and #5
                    # if use option '--colls' to sync target collections,
                    # commands running on other collections in the same database may replay failed
                    try:
                        self._mc[dbname].command(oplog['o'])
                    except pymongo.errors.OperationFailure as e:
                        log.error('%s: %s' % (e, oplog))
                elif op == 'n':  # no-op
                    pass
                else:
                    log.error('invaid op: %s' % oplog)
                return
            except pymongo.errors.AutoReconnect as e:
                self.reconnect()
            except pymongo.errors.DuplicateKeyError as e:
                raise e  # handle error in syncer
            except pymongo.errors.WriteError as e:
                log.error('%s' % e)

                # For case:
                #   Update the values of shard key fields when syncing from replica set to sharded cluster.
                #
                # Once you shard a collection, the shard key and the shard key values are immutable.
                # Reference: https://docs.mongodb.com/manual/core/sharding-shard-key/
                if self._mc.is_mongos and oplog['op'] == 'u' and 'the (immutable) field' in str(e):
                    old_doc = self._mc[dbname][collname].find_one(oplog['o2'])
                    if not old_doc:
                        log.error('replay update failed: document not found:', oplog['o2'])
                        sys.exit(1)
                    if '$set' in oplog['o']:
                        new_doc = old_doc.update(oplog['o']['$set'])
                    else:
                        new_doc = oplog['o']

                    # TODO: here need a transaction to delete old and insert new

                    # delete old document
                    res = self._mc[dbname][collname].delete_one(oplog['o2'])
                    if res.deleted_count != 1:
                        log.error('replay update failed: delete old document failed:', oplog['o2'])
                        sys.exit(1)
                    # insert new document
                    res = self._dst_mc[dbname][collname].insert_one(new_doc)
                    if not res.inserted_id:
                        log.error('replay update failed: insert new document failed:', new_doc)
                        sys.exit(1)