Ejemplo n.º 1
0
 def save_doc(self, batch: pump.Batch, dockey: bytes, docvalue: bytes):
     cmd = couchbaseConstants.CMD_TAP_MUTATION
     vbucket_id = 0x0000ffff
     # common flags:       0x02000000 (JSON)
     # legacy flags:       0x00000006 (JSON)
     cas, exp, flg = 0, 0, 0x02000006
     try:
         doc = json.loads(docvalue)
         if '_id' not in doc:
             msg = (cmd, vbucket_id, dockey, flg, exp, cas, b'', docvalue, 0, 0, 0, 0)
             batch.append(msg, len(docvalue))
     except ValueError as error:
         logging.error(f'Fail to read json file with error: {error!s}')
Ejemplo n.º 2
0
 def save_doc(self, batch: pump.Batch, dockey: bytes, docvalue: bytes):
     cmd = couchbaseConstants.CMD_TAP_MUTATION
     vbucket_id = 0x0000ffff
     # common flags:       0x02000000 (JSON)
     # legacy flags:       0x00000006 (JSON)
     cas, exp, flg = 0, 0, 0x02000006
     try:
         doc = json.loads(docvalue)
         if '_id' not in doc:
             msg = (cmd, vbucket_id, dockey, flg, exp, cas, b'', docvalue, 0, 0, 0, 0)
             batch.append(msg, len(docvalue))
         else:
             id = doc['_id'].encode('UTF-8')
             del doc['_id']
             docdata = {"doc": {
                 "json": doc,
                 "meta": {"id": id}
             }}
     except ValueError as error:
         logging.error(f'Fail to read json file with error: {error!s}')
Ejemplo n.º 3
0
    def scatter_gather(self, mconns: Dict[str, cb_bin_client.MemcachedClient], batch: pump.Batch) -> \
            Tuple[couchbaseConstants.PUMP_ERROR, Optional[pump.Batch], Optional[bool]]:
        sink_map_buckets = self.sink_map['buckets']
        if len(sink_map_buckets) != 1:
            return "error: CBSink.run() expected 1 bucket in sink_map", None, None

        vbuckets_num = len(
            sink_map_buckets[0]['vBucketServerMap']['vBucketMap'])
        vbuckets = batch.group_by_vbucket_id(vbuckets_num, self.rehash)
        vbucket_skip_list: Dict[int, List[int]] = {}

        # Scatter or send phase.
        for vbucket_id, msgs in vbuckets.items():
            rv, conn = self.find_conn(mconns, vbucket_id, msgs)
            if rv != 0:
                return rv, None, None
            if conn is not None:
                rv, skipped = self.send_msgs(conn,
                                             msgs,
                                             self.operation(),
                                             vbucket_id=vbucket_id)
                if rv != 0:
                    return rv, None, None
                if len(skipped) > 0:
                    vbucket_skip_list[vbucket_id] = skipped

        # Yield to let other threads do stuff while server's processing.
        time.sleep(0.01)

        retry_batch = None
        need_refresh = False

        # Gather or recv phase.
        for vbucket_id, msgs in vbuckets.items():
            rv, conn = self.find_conn(mconns, vbucket_id, msgs)
            if rv != 0:
                return rv, None, None
            if conn is not None:
                rv, retry, refresh = self.recv_msgs(conn,
                                                    msgs,
                                                    vbucket_skip_list.get(
                                                        vbucket_id, []),
                                                    vbucket_id=vbucket_id)
            if rv != 0:
                return rv, None, None
            if retry:
                retry_batch = batch
            if refresh:
                need_refresh = True

        if need_refresh:
            self.refresh_sink_map()

        return 0, retry_batch, retry_batch is not None and not need_refresh
Ejemplo n.º 4
0
    def scatter_gather(self, mconns: Dict[str, cb_bin_client.MemcachedClient], batch: pump.Batch) -> \
            Tuple[couchbaseConstants.PUMP_ERROR, Optional[pump.Batch], Optional[bool]]:
        sink_map_buckets = self.sink_map['buckets']
        if len(sink_map_buckets) != 1:
            return "error: CBSink.run() expected 1 bucket in sink_map", None, None

        vbuckets_num = len(sink_map_buckets[0]['vBucketServerMap']['vBucketMap'])
        vbuckets = batch.group_by_vbucket_id(vbuckets_num, self.rehash)

        # Scatter or send phase.
        for vbucket_id, msgs in vbuckets.items():
            rv, conn = self.find_conn(mconns, vbucket_id, msgs)
            if rv != 0:
                return rv, None, None
            if conn is not None:
                rv = self.send_msgs(conn, msgs, self.operation(),
                                    vbucket_id=vbucket_id)
            if rv != 0:
                return rv, None, None

        # Yield to let other threads do stuff while server's processing.
        time.sleep(0.01)

        retry_batch = None
        need_refresh = False

        # Gather or recv phase.
        for vbucket_id, msgs in vbuckets.items():
            rv, conn = self.find_conn(mconns, vbucket_id, msgs)
            if rv != 0:
                return rv, None, None
            if conn is not None:
                rv, retry, refresh = self.recv_msgs(conn, msgs, vbucket_id=vbucket_id)
            if rv != 0:
                return rv, None, None
            if retry:
                retry_batch = batch
            if refresh:
                need_refresh = True

        if need_refresh:
            self.refresh_sink_map()

        return 0, retry_batch, retry_batch is not None and not need_refresh
Ejemplo n.º 5
0
    def provide_batch(self):
        if self.cursor_done:
            return 0, None

        batch = Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        source_vbucket_state = \
            getattr(self.opts, 'source_vbucket_state', 'active')

        try:
            if self.cursor_todo is None:
                rv, db, attached_dbs, table_dbs, vbucket_states = self.connect_db(
                )
                if rv != 0:
                    return rv, None

                # Determine which db the state table is in.
                try:
                    (state_db, ) = table_dbs[u'vbucket_states']
                except ValueError:
                    db.close()
                    return "error: no unique vbucket_states table", None

                kv_names = []
                for kv_name, db_name in table_dbs.iteritems():
                    if (self.opts.id is None
                            and not kv_name.startswith('kv_')):
                        continue
                    if (self.opts.id is not None
                            and kv_name != "kv_%s" % (self.opts.id)):
                        continue
                    kv_names.append(kv_name)

                db_kv_names = []
                for kv_name in sorted(kv_names,
                                      key=lambda x: int(x.split('_')[-1])):
                    for db_name in sorted(table_dbs[kv_name]):
                        db_kv_names.append((db_name, kv_name))

                self.cursor_todo = (db, db_kv_names, None, vbucket_states)

            db, db_kv_names, cursor, vbucket_states = self.cursor_todo
            if not db:
                self.cursor_done = True
                self.cursor_todo = None
                return 0, None

            while (not self.cursor_done and batch.size() < batch_max_size
                   and batch.bytes < batch_max_bytes):
                if not cursor:
                    if not db_kv_names:
                        self.cursor_done = True
                        self.cursor_todo = None
                        db.close()
                        break

                    db_name, kv_name = db_kv_names.pop()
                    vbucket_id = int(kv_name.split('_')[-1])
                    if not vbucket_states[source_vbucket_state].has_key(
                            vbucket_id):
                        break

                    logging.debug("  MBFSource db/kv table: %s/%s" %
                                  (db_name, kv_name))
                    cursor = db.cursor()
                    cursor.execute(self.s % (db_name, kv_name))
                    self.cursor_todo = (db, db_kv_names, cursor,
                                        vbucket_states)

                row = cursor.fetchone()
                if row:
                    vbucket_id = row[0]
                    key = row[1]
                    flg = row[2]
                    exp = row[3]
                    cas = row[4]
                    val = row[5]
                    version = int(row[6])

                    if self.skip(key, vbucket_id):
                        continue

                    if version != vbucket_states[source_vbucket_state][
                            vbucket_id]:
                        continue

                    meta = ''
                    flg = socket.ntohl(ctypes.c_uint32(flg).value)
                    batch.append((couchbaseConstants.CMD_TAP_MUTATION,
                                  vbucket_id, key, flg, exp, cas, meta, val),
                                 len(val))
                else:
                    cursor.close()
                    self.cursor_todo = (db, db_kv_names, None, vbucket_states)
                    break  # Close the batch; next pass hits new db_name/kv_name.

        except Exception, e:
            self.cursor_done = True
            self.cursor_todo = None
            return "error: MBFSource exception: " + str(e), None
Ejemplo n.º 6
0
    def provide_batch(self):
        if self.cursor_done:
            return 0, None

        batch = Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        source_vbucket_state = \
            getattr(self.opts, 'source_vbucket_state', 'active')

        try:
            if self.cursor_todo is None:
                rv, db, attached_dbs, table_dbs = self.connect_db()
                if rv != 0:
                    return rv, None

                # Determine which db the state table is in.
                try:
                    (state_db,) = table_dbs[u'vbucket_states']
                except ValueError:
                    db.close()
                    return "error: no unique vbucket_states table", None

                sql = self.s % (state_db, source_vbucket_state)
                kv_names = []
                for kv_name, db_name in table_dbs.iteritems():
                    if (self.opts.id is None and
                        not kv_name.startswith('kv_')):
                        continue
                    if (self.opts.id is not None and
                        kv_name != "kv_%s" % (self.opts.id) ):
                        continue
                    kv_names.append(kv_name)

                db_kv_names = []
                for kv_name in sorted(kv_names,
                                      key=lambda x: int(x.split('_')[-1])):
                    for db_name in sorted(table_dbs[kv_name]):
                        db_kv_names.append((db_name, kv_name))

                self.cursor_todo = (db, sql, db_kv_names, None)

            db, sql, db_kv_names, cursor = self.cursor_todo
            if not db:
                self.cursor_done = True
                self.cursor_todo = None
                return 0, None

            while (not self.cursor_done and
                   batch.size() < batch_max_size and
                   batch.bytes < batch_max_bytes):
                if not cursor:
                    if not db_kv_names:
                        self.cursor_done = True
                        self.cursor_todo = None
                        db.close()
                        break

                    db_name, kv_name = db_kv_names.pop()
                    logging.debug("  MBFSource db/kv table: %s/%s" %
                                  (db_name, kv_name))

                    cursor = db.cursor()
                    cursor.execute(sql % (db_name, kv_name))

                    self.cursor_todo = (db, sql, db_kv_names, cursor)

                row = cursor.fetchone()
                if row:
                    vbucket_id = row[0]
                    key = row[1]
                    flg = row[2]
                    exp = row[3]
                    cas = row[4]
                    val = row[5]

                    if self.skip(key, vbucket_id):
                        continue

                    meta = ''

                    batch.append((memcacheConstants.CMD_TAP_MUTATION,
                                  vbucket_id, key, flg, exp, cas, meta, val), len(val))
                else:
                    cursor.close()
                    self.cursor_todo = (db, sql, db_kv_names, None)
                    break # Close the batch; next pass hits new db_name/kv_name.

        except Exception, e:
            self.cursor_done = True
            self.cursor_todo = None
            return "error: MBFSource exception: " + str(e), None
Ejemplo n.º 7
0
    def provide_batch(self):
        if self.done:
            return 0, None

        batch = Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        s = "SELECT cmd, vbucket_id, key, flg, exp, cas, meta, val FROM cbb_msg"

        if self.files is None: # None != [], as self.files will shrink to [].
            g = glob.glob(BFD.db_dir(self.spec,
                                     self.bucket_name(),
                                     self.node_name()) + "/data-*.cbb")
            self.files = sorted(g)
        try:
            while (not self.done and
                   batch.size() < batch_max_size and
                   batch.bytes < batch_max_bytes):
                if self.cursor_db is None:
                    if not self.files:
                        self.done = True
                        return 0, batch

                    rv, db = connect_db(self.files[0], self.opts, CBB_VERSION)
                    if rv != 0:
                        return rv, None
                    self.files = self.files[1:]

                    cursor = db.cursor()
                    cursor.execute(s)

                    self.cursor_db = (cursor, db)

                cursor, db = self.cursor_db

                row = cursor.fetchone()
                if row:
                    vbucket_id = row[1]
                    key = row[2]
                    val = row[7]

                    if self.skip(key, vbucket_id):
                        continue

                    msg = (row[0], row[1], row[2], row[3], row[4],
                           int(row[5]), # CAS as 64-bit integer not string.
                           row[6], row[7])
                    batch.append(msg, len(val))
                else:
                    if self.cursor_db:
                        self.cursor_db[0].close()
                        self.cursor_db[1].close()
                    self.cursor_db = None

            return 0, batch

        except Exception, e:
            self.done = True
            if self.cursor_db:
                self.cursor_db[0].close()
                self.cursor_db[1].close()
            self.cursor_db = None

            return "error: exception reading backup file: " + str(e), None
Ejemplo n.º 8
0
    def provide_batch(self):
        if self.done:
            return 0, None

        batch = Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        s = "SELECT cmd, vbucket_id, key, flg, exp, cas, meta, val FROM cbb_msg"

        if self.files is None:  # None != [], as self.files will shrink to [].
            g = glob.glob(
                BFD.db_dir(self.spec, self.bucket_name(), self.node_name()) +
                "/data-*.cbb")
            self.files = sorted(g)
        try:
            while (not self.done and batch.size() < batch_max_size
                   and batch.bytes < batch_max_bytes):
                if self.cursor_db is None:
                    if not self.files:
                        self.done = True
                        return 0, batch

                    rv, db = connect_db(self.files[0], self.opts, CBB_VERSION)
                    if rv != 0:
                        return rv, None
                    self.files = self.files[1:]

                    cursor = db.cursor()
                    cursor.execute(s)

                    self.cursor_db = (cursor, db)

                cursor, db = self.cursor_db

                row = cursor.fetchone()
                if row:
                    vbucket_id = row[1]
                    key = row[2]
                    val = row[7]

                    if self.skip(key, vbucket_id):
                        continue

                    msg = (
                        row[0],
                        row[1],
                        row[2],
                        row[3],
                        row[4],
                        int(row[5]),  # CAS as 64-bit integer not string.
                        row[6],
                        row[7])
                    batch.append(msg, len(val))
                else:
                    if self.cursor_db:
                        self.cursor_db[0].close()
                        self.cursor_db[1].close()
                    self.cursor_db = None

            return 0, batch

        except Exception, e:
            self.done = True
            if self.cursor_db:
                self.cursor_db[0].close()
                self.cursor_db[1].close()
            self.cursor_db = None

            return "error: exception reading backup file: " + str(e), None