def save_doc(self, batch: pump.Batch, dockey: bytes, docvalue: bytes): cmd = couchbaseConstants.CMD_TAP_MUTATION vbucket_id = 0x0000ffff # common flags: 0x02000000 (JSON) # legacy flags: 0x00000006 (JSON) cas, exp, flg = 0, 0, 0x02000006 try: doc = json.loads(docvalue) if '_id' not in doc: msg = (cmd, vbucket_id, dockey, flg, exp, cas, b'', docvalue, 0, 0, 0, 0) batch.append(msg, len(docvalue)) except ValueError as error: logging.error(f'Fail to read json file with error: {error!s}')
def save_doc(self, batch: pump.Batch, dockey: bytes, docvalue: bytes): cmd = couchbaseConstants.CMD_TAP_MUTATION vbucket_id = 0x0000ffff # common flags: 0x02000000 (JSON) # legacy flags: 0x00000006 (JSON) cas, exp, flg = 0, 0, 0x02000006 try: doc = json.loads(docvalue) if '_id' not in doc: msg = (cmd, vbucket_id, dockey, flg, exp, cas, b'', docvalue, 0, 0, 0, 0) batch.append(msg, len(docvalue)) else: id = doc['_id'].encode('UTF-8') del doc['_id'] docdata = {"doc": { "json": doc, "meta": {"id": id} }} except ValueError as error: logging.error(f'Fail to read json file with error: {error!s}')
def scatter_gather(self, mconns: Dict[str, cb_bin_client.MemcachedClient], batch: pump.Batch) -> \ Tuple[couchbaseConstants.PUMP_ERROR, Optional[pump.Batch], Optional[bool]]: sink_map_buckets = self.sink_map['buckets'] if len(sink_map_buckets) != 1: return "error: CBSink.run() expected 1 bucket in sink_map", None, None vbuckets_num = len( sink_map_buckets[0]['vBucketServerMap']['vBucketMap']) vbuckets = batch.group_by_vbucket_id(vbuckets_num, self.rehash) vbucket_skip_list: Dict[int, List[int]] = {} # Scatter or send phase. for vbucket_id, msgs in vbuckets.items(): rv, conn = self.find_conn(mconns, vbucket_id, msgs) if rv != 0: return rv, None, None if conn is not None: rv, skipped = self.send_msgs(conn, msgs, self.operation(), vbucket_id=vbucket_id) if rv != 0: return rv, None, None if len(skipped) > 0: vbucket_skip_list[vbucket_id] = skipped # Yield to let other threads do stuff while server's processing. time.sleep(0.01) retry_batch = None need_refresh = False # Gather or recv phase. for vbucket_id, msgs in vbuckets.items(): rv, conn = self.find_conn(mconns, vbucket_id, msgs) if rv != 0: return rv, None, None if conn is not None: rv, retry, refresh = self.recv_msgs(conn, msgs, vbucket_skip_list.get( vbucket_id, []), vbucket_id=vbucket_id) if rv != 0: return rv, None, None if retry: retry_batch = batch if refresh: need_refresh = True if need_refresh: self.refresh_sink_map() return 0, retry_batch, retry_batch is not None and not need_refresh
def scatter_gather(self, mconns: Dict[str, cb_bin_client.MemcachedClient], batch: pump.Batch) -> \ Tuple[couchbaseConstants.PUMP_ERROR, Optional[pump.Batch], Optional[bool]]: sink_map_buckets = self.sink_map['buckets'] if len(sink_map_buckets) != 1: return "error: CBSink.run() expected 1 bucket in sink_map", None, None vbuckets_num = len(sink_map_buckets[0]['vBucketServerMap']['vBucketMap']) vbuckets = batch.group_by_vbucket_id(vbuckets_num, self.rehash) # Scatter or send phase. for vbucket_id, msgs in vbuckets.items(): rv, conn = self.find_conn(mconns, vbucket_id, msgs) if rv != 0: return rv, None, None if conn is not None: rv = self.send_msgs(conn, msgs, self.operation(), vbucket_id=vbucket_id) if rv != 0: return rv, None, None # Yield to let other threads do stuff while server's processing. time.sleep(0.01) retry_batch = None need_refresh = False # Gather or recv phase. for vbucket_id, msgs in vbuckets.items(): rv, conn = self.find_conn(mconns, vbucket_id, msgs) if rv != 0: return rv, None, None if conn is not None: rv, retry, refresh = self.recv_msgs(conn, msgs, vbucket_id=vbucket_id) if rv != 0: return rv, None, None if retry: retry_batch = batch if refresh: need_refresh = True if need_refresh: self.refresh_sink_map() return 0, retry_batch, retry_batch is not None and not need_refresh
def provide_batch(self): if self.cursor_done: return 0, None batch = Batch(self) batch_max_size = self.opts.extra['batch_max_size'] batch_max_bytes = self.opts.extra['batch_max_bytes'] source_vbucket_state = \ getattr(self.opts, 'source_vbucket_state', 'active') try: if self.cursor_todo is None: rv, db, attached_dbs, table_dbs, vbucket_states = self.connect_db( ) if rv != 0: return rv, None # Determine which db the state table is in. try: (state_db, ) = table_dbs[u'vbucket_states'] except ValueError: db.close() return "error: no unique vbucket_states table", None kv_names = [] for kv_name, db_name in table_dbs.iteritems(): if (self.opts.id is None and not kv_name.startswith('kv_')): continue if (self.opts.id is not None and kv_name != "kv_%s" % (self.opts.id)): continue kv_names.append(kv_name) db_kv_names = [] for kv_name in sorted(kv_names, key=lambda x: int(x.split('_')[-1])): for db_name in sorted(table_dbs[kv_name]): db_kv_names.append((db_name, kv_name)) self.cursor_todo = (db, db_kv_names, None, vbucket_states) db, db_kv_names, cursor, vbucket_states = self.cursor_todo if not db: self.cursor_done = True self.cursor_todo = None return 0, None while (not self.cursor_done and batch.size() < batch_max_size and batch.bytes < batch_max_bytes): if not cursor: if not db_kv_names: self.cursor_done = True self.cursor_todo = None db.close() break db_name, kv_name = db_kv_names.pop() vbucket_id = int(kv_name.split('_')[-1]) if not vbucket_states[source_vbucket_state].has_key( vbucket_id): break logging.debug(" MBFSource db/kv table: %s/%s" % (db_name, kv_name)) cursor = db.cursor() cursor.execute(self.s % (db_name, kv_name)) self.cursor_todo = (db, db_kv_names, cursor, vbucket_states) row = cursor.fetchone() if row: vbucket_id = row[0] key = row[1] flg = row[2] exp = row[3] cas = row[4] val = row[5] version = int(row[6]) if self.skip(key, vbucket_id): continue if version != vbucket_states[source_vbucket_state][ vbucket_id]: continue meta = '' flg = socket.ntohl(ctypes.c_uint32(flg).value) batch.append((couchbaseConstants.CMD_TAP_MUTATION, vbucket_id, key, flg, exp, cas, meta, val), len(val)) else: cursor.close() self.cursor_todo = (db, db_kv_names, None, vbucket_states) break # Close the batch; next pass hits new db_name/kv_name. except Exception, e: self.cursor_done = True self.cursor_todo = None return "error: MBFSource exception: " + str(e), None
def provide_batch(self): if self.cursor_done: return 0, None batch = Batch(self) batch_max_size = self.opts.extra['batch_max_size'] batch_max_bytes = self.opts.extra['batch_max_bytes'] source_vbucket_state = \ getattr(self.opts, 'source_vbucket_state', 'active') try: if self.cursor_todo is None: rv, db, attached_dbs, table_dbs = self.connect_db() if rv != 0: return rv, None # Determine which db the state table is in. try: (state_db,) = table_dbs[u'vbucket_states'] except ValueError: db.close() return "error: no unique vbucket_states table", None sql = self.s % (state_db, source_vbucket_state) kv_names = [] for kv_name, db_name in table_dbs.iteritems(): if (self.opts.id is None and not kv_name.startswith('kv_')): continue if (self.opts.id is not None and kv_name != "kv_%s" % (self.opts.id) ): continue kv_names.append(kv_name) db_kv_names = [] for kv_name in sorted(kv_names, key=lambda x: int(x.split('_')[-1])): for db_name in sorted(table_dbs[kv_name]): db_kv_names.append((db_name, kv_name)) self.cursor_todo = (db, sql, db_kv_names, None) db, sql, db_kv_names, cursor = self.cursor_todo if not db: self.cursor_done = True self.cursor_todo = None return 0, None while (not self.cursor_done and batch.size() < batch_max_size and batch.bytes < batch_max_bytes): if not cursor: if not db_kv_names: self.cursor_done = True self.cursor_todo = None db.close() break db_name, kv_name = db_kv_names.pop() logging.debug(" MBFSource db/kv table: %s/%s" % (db_name, kv_name)) cursor = db.cursor() cursor.execute(sql % (db_name, kv_name)) self.cursor_todo = (db, sql, db_kv_names, cursor) row = cursor.fetchone() if row: vbucket_id = row[0] key = row[1] flg = row[2] exp = row[3] cas = row[4] val = row[5] if self.skip(key, vbucket_id): continue meta = '' batch.append((memcacheConstants.CMD_TAP_MUTATION, vbucket_id, key, flg, exp, cas, meta, val), len(val)) else: cursor.close() self.cursor_todo = (db, sql, db_kv_names, None) break # Close the batch; next pass hits new db_name/kv_name. except Exception, e: self.cursor_done = True self.cursor_todo = None return "error: MBFSource exception: " + str(e), None
def provide_batch(self): if self.done: return 0, None batch = Batch(self) batch_max_size = self.opts.extra['batch_max_size'] batch_max_bytes = self.opts.extra['batch_max_bytes'] s = "SELECT cmd, vbucket_id, key, flg, exp, cas, meta, val FROM cbb_msg" if self.files is None: # None != [], as self.files will shrink to []. g = glob.glob(BFD.db_dir(self.spec, self.bucket_name(), self.node_name()) + "/data-*.cbb") self.files = sorted(g) try: while (not self.done and batch.size() < batch_max_size and batch.bytes < batch_max_bytes): if self.cursor_db is None: if not self.files: self.done = True return 0, batch rv, db = connect_db(self.files[0], self.opts, CBB_VERSION) if rv != 0: return rv, None self.files = self.files[1:] cursor = db.cursor() cursor.execute(s) self.cursor_db = (cursor, db) cursor, db = self.cursor_db row = cursor.fetchone() if row: vbucket_id = row[1] key = row[2] val = row[7] if self.skip(key, vbucket_id): continue msg = (row[0], row[1], row[2], row[3], row[4], int(row[5]), # CAS as 64-bit integer not string. row[6], row[7]) batch.append(msg, len(val)) else: if self.cursor_db: self.cursor_db[0].close() self.cursor_db[1].close() self.cursor_db = None return 0, batch except Exception, e: self.done = True if self.cursor_db: self.cursor_db[0].close() self.cursor_db[1].close() self.cursor_db = None return "error: exception reading backup file: " + str(e), None
def provide_batch(self): if self.done: return 0, None batch = Batch(self) batch_max_size = self.opts.extra['batch_max_size'] batch_max_bytes = self.opts.extra['batch_max_bytes'] s = "SELECT cmd, vbucket_id, key, flg, exp, cas, meta, val FROM cbb_msg" if self.files is None: # None != [], as self.files will shrink to []. g = glob.glob( BFD.db_dir(self.spec, self.bucket_name(), self.node_name()) + "/data-*.cbb") self.files = sorted(g) try: while (not self.done and batch.size() < batch_max_size and batch.bytes < batch_max_bytes): if self.cursor_db is None: if not self.files: self.done = True return 0, batch rv, db = connect_db(self.files[0], self.opts, CBB_VERSION) if rv != 0: return rv, None self.files = self.files[1:] cursor = db.cursor() cursor.execute(s) self.cursor_db = (cursor, db) cursor, db = self.cursor_db row = cursor.fetchone() if row: vbucket_id = row[1] key = row[2] val = row[7] if self.skip(key, vbucket_id): continue msg = ( row[0], row[1], row[2], row[3], row[4], int(row[5]), # CAS as 64-bit integer not string. row[6], row[7]) batch.append(msg, len(val)) else: if self.cursor_db: self.cursor_db[0].close() self.cursor_db[1].close() self.cursor_db = None return 0, batch except Exception, e: self.done = True if self.cursor_db: self.cursor_db[0].close() self.cursor_db[1].close() self.cursor_db = None return "error: exception reading backup file: " + str(e), None