def run(self): destination_vbucket_state = getattr(self.opts, 'destination_vbucket_state', 'active') vbucket_states = self.source_node.get('vbucket_states', {}) while not self.ctl['stop']: batch, future = self.pull_next_batch( ) # type: pump.Batch, pump.SinkBatchFuture if not batch: return self.future_done(future, 0) vbuckets = batch.group_by_vbucket_id(SFD_VBUCKETS, self.rehash) for vbucket_id, msgs in vbuckets.items(): checkpoint_id = 0 max_deleted_seqno = 0 rv, store, store_path = self.open_store(vbucket_id) if rv != 0: return self.future_done(future, rv) bulk_keys = [] bulk_vals = [] for i, msg in enumerate(msgs): cmd, _vbucket_id, key, flg, exp, cas, meta, val, seqno, dtype, nmeta, conf_res = msg if self.skip(key, vbucket_id): continue # TODO: add default collection to all keys in CC this should change to have the correct collection key = encodeCollectionId(0) + key d = couchstore.DocumentInfo(key.decode()) flex_meta = 1 d.revMeta = struct.pack(SFD_REV_META, cas, exp, flg, flex_meta, dtype) if len(meta) != 0: if len(meta) > 8: meta = meta[0:8] if len(meta) < 8: meta = (b'\x00\x00\x00\x00\x00\x00\x00\x00' + meta)[-8:] d.revSequence, = struct.unpack(SFD_REV_SEQ, meta) else: d.revSequence = 1 if seqno: d.sequence = int(seqno) if cmd == couchbaseConstants.CMD_TAP_MUTATION or cmd == couchbaseConstants.CMD_DCP_MUTATION: try: v = val if dtype & 0x01: d.contentType = couchstore.DocumentInfo.IS_JSON # Why do this when we have a flag for it? # if re.match('^\\s*{', v) and json.loads(v) is not None: # d.contentType = couchstore.DocumentInfo.IS_JSON except ValueError: pass # NON_JSON is already the default contentType. elif cmd == couchbaseConstants.CMD_TAP_DELETE or cmd == couchbaseConstants.CMD_DCP_DELETE: v = None else: self.future_done(future, f'error: SFDSink bad cmd: {cmd!s}') store.close() return bulk_keys.append(d) bulk_vals.append(v) try: if bulk_keys and bulk_vals: vm = vbucket_states.get(destination_vbucket_state, None) if vm: vi = vm.get(vbucket_id, None) if vi: c = int(vi.get("checkpoint_id", checkpoint_id)) checkpoint_id = max(checkpoint_id, c) m = int( vi.get("max_deleted_seqno", max_deleted_seqno)) max_deleted_seqno = max(max_deleted_seqno, m) rv = self.save_vbucket_state( store, vbucket_id, destination_vbucket_state, checkpoint_id, max_deleted_seqno) if rv != 0: self.future_done(future, rv) store.close() return store.saveMultiple( bulk_keys, bulk_vals, options=couchstore.CouchStore.COMPRESS) store.commit() store.close() except Exception as e: self.future_done( future, f'error: could not save couchstore data; vbucket_id: {vbucket_id}; ' f'store_path: {store_path}; exception: {e}') return self.future_done(future, 0) # No return to keep looping.
def provide_batch(self) -> Tuple[couchbaseConstants.PUMP_ERROR, Optional[pump.Batch]]: """Provides a batch of messages, with GET/SET ratios and keys controlled by a mcsoda-inspired approach, but simpler.""" if self.done: return 0, None cfg: Dict[str, Any] = self.source_map['cfg'] prefix: str = cfg['prefix'] max_items: int = cfg['max-items'] ratio_sets: float = cfg['ratio-sets'] exit_after_creates: bool = cfg['exit-after-creates'] low_compression: bool = cfg['low-compression'] xattrs: bool = cfg['xattr'] itr = None collections = self.opts.collection if collections: itr = iter(collections) json_body: bool = cfg['json'] if not self.body: if low_compression: # Generate a document which snappy will struggle to compress. # Useful if your creating data-sets which utilise disk. random.seed(0) # Seed to a fixed value so we always have the same document pattern. document = ''.join(random.choice(string.ascii_uppercase) for _ in range(cfg['min-value-size'])) else: # else a string of 0 is fine, but will compress very well. document = "0" * cfg['min-value-size'] self.body = document batch = pump.Batch(self) batch_max_size = self.opts.extra['batch_max_size'] batch_max_bytes = self.opts.extra['batch_max_bytes'] vbucket_id = 0x0000ffff cas, exp, flg = 0, 0, 0 while (batch.size() < batch_max_size and batch.bytes < batch_max_bytes): if ratio_sets >= float(self.cur_sets) / float(self.cur_ops or 1): self.cur_sets = self.cur_sets + 1 if xattrs: cmd: int = couchbaseConstants.CMD_SUBDOC_MULTIPATH_MUTATION else: cmd = couchbaseConstants.CMD_DCP_MUTATION if self.cur_items < max_items: key = str(self.cur_items) self.cur_items = self.cur_items + 1 else: key = str(self.cur_sets % self.cur_items) else: self.cur_gets = self.cur_gets + 1 if xattrs: cmd = couchbaseConstants.CMD_SUBDOC_MULTIPATH_LOOKUP else: cmd = couchbaseConstants.CMD_GET key = str(self.cur_gets % self.cur_items) self.cur_ops = self.cur_ops + 1 if json_body: value = f'{{"name": "{prefix}{key}", "age": {int(key) % 101}, "index": "{key}", "body":"{self.body}"}}' else: value = self.body if xattrs: value = json.dumps({"obj": value, "xattr_f": "field1", "xattr_v": "\"value1\""}) value_bytes: bytes = value.encode() # generate a collection key if itr: try: cid = int(next(itr), 16) except StopIteration: itr = iter(collections) cid = int(next(itr), 16) encodedCid = encodeCollectionId(cid) # Generate the pack format and pack the key docKey: bytes = struct.pack( ("!" + str(len(encodedCid)) + "s" + str(len(prefix)) + "s" + str(len(key)) + "s").encode(), encodedCid, prefix.encode(), key.encode()); else: docKey = prefix.encode() + key.encode() datatype = 0x00 if json_body: datatype = 0x01 msg: couchbaseConstants.BATCH_MSG = (cmd, vbucket_id, docKey, flg, exp, cas, b'', value_bytes, 0, datatype, 0, 0) batch.append(msg, len(value_bytes)) if exit_after_creates and self.cur_items >= max_items: self.done = True return 0, batch if batch.size() <= 0: return 0, None return 0, batch
def run(self): destination_vbucket_state = getattr(self.opts, 'destination_vbucket_state', 'active') vbucket_states = self.source_node.get('vbucket_states', {}) while not self.ctl['stop']: batch, future = self.pull_next_batch() # type: pump.Batch, pump.SinkBatchFuture if not batch: return self.future_done(future, 0) vbuckets = batch.group_by_vbucket_id(SFD_VBUCKETS, self.rehash) for vbucket_id, msgs in vbuckets.items(): checkpoint_id = 0 max_deleted_seqno = 0 rv, store, store_path = self.open_store(vbucket_id) if rv != 0: return self.future_done(future, rv) bulk_keys = [] bulk_vals = [] for i, msg in enumerate(msgs): cmd, _vbucket_id, key, flg, exp, cas, meta, val, seqno, dtype, nmeta, conf_res = msg if self.skip(key, vbucket_id): continue # TODO: add default collection to all keys in CC this should change to have the correct collection key = encodeCollectionId(0) + key d = couchstore.DocumentInfo(key.decode()) flex_meta = 1 d.revMeta = struct.pack(SFD_REV_META, cas, exp, flg, flex_meta, dtype) if len(meta) != 0: if len(meta) > 8: meta = meta[0:8] if len(meta) < 8: meta = (b'\x00\x00\x00\x00\x00\x00\x00\x00' + meta)[-8:] d.revSequence, = struct.unpack(SFD_REV_SEQ, meta) else: d.revSequence = 1 if seqno: d.sequence = int(seqno) if cmd == couchbaseConstants.CMD_TAP_MUTATION or cmd == couchbaseConstants.CMD_DCP_MUTATION: try: v = val if dtype & 0x01: d.contentType = couchstore.DocumentInfo.IS_JSON # Why do this when we have a flag for it? # if re.match('^\\s*{', v) and json.loads(v) is not None: # d.contentType = couchstore.DocumentInfo.IS_JSON except ValueError: pass # NON_JSON is already the default contentType. elif cmd == couchbaseConstants.CMD_TAP_DELETE or cmd == couchbaseConstants.CMD_DCP_DELETE: v = None else: self.future_done(future, f'error: SFDSink bad cmd: {cmd!s}') store.close() return bulk_keys.append(d) bulk_vals.append(v) try: if bulk_keys and bulk_vals: vm = vbucket_states.get(destination_vbucket_state, None) if vm: vi = vm.get(vbucket_id, None) if vi: c = int(vi.get("checkpoint_id", checkpoint_id)) checkpoint_id = max(checkpoint_id, c) m = int(vi.get("max_deleted_seqno", max_deleted_seqno)) max_deleted_seqno = max(max_deleted_seqno, m) rv = self.save_vbucket_state(store, vbucket_id, destination_vbucket_state, checkpoint_id, max_deleted_seqno) if rv != 0: self.future_done(future, rv) store.close() return store.saveMultiple(bulk_keys, bulk_vals, options=couchstore.CouchStore.COMPRESS) store.commit() store.close() except Exception as e: self.future_done(future, f'error: could not save couchstore data; vbucket_id: {vbucket_id}; ' f'store_path: {store_path}; exception: {e}') return self.future_done(future, 0) # No return to keep looping.
def provide_batch(self): """Provides a batch of messages, with GET/SET ratios and keys controlled by a mcsoda-inspired approach, but simpler.""" if self.done: return 0, None cfg = self.source_map['cfg'] prefix = cfg['prefix'] max_items = cfg['max-items'] ratio_sets = cfg['ratio-sets'] exit_after_creates = cfg['exit-after-creates'] low_compression = cfg['low-compression'] itr = None collections = self.opts.collection if collections: itr = iter(collections) json = cfg['json'] if not self.body: if low_compression: # Generate a document which snappy will struggle to compress. # Useful if your creating data-sets which utilise disk. random.seed( 0 ) # Seed to a fixed value so we always have the same document pattern. document = ''.join( random.choice(string.ascii_uppercase) for _ in range(cfg['min-value-size'])) else: # else a string of 0 is fine, but will compress very well. document = "0" * cfg['min-value-size'] if json: self.body = '{"name": "%s%s", "age": %s, "index": %s,' + \ ' "body": "%s"}' % document else: self.body = document batch = pump.Batch(self) batch_max_size = self.opts.extra['batch_max_size'] batch_max_bytes = self.opts.extra['batch_max_bytes'] vbucket_id = 0x0000ffff cas, exp, flg = 0, 0, 0 while (batch.size() < batch_max_size and batch.bytes < batch_max_bytes): if ratio_sets >= float(self.cur_sets) / float(self.cur_ops or 1): self.cur_sets = self.cur_sets + 1 cmd = couchbaseConstants.CMD_TAP_MUTATION if self.cur_items < max_items: key = str(self.cur_items) self.cur_items = self.cur_items + 1 else: key = str(self.cur_sets % self.cur_items) else: self.cur_gets = self.cur_gets + 1 cmd = couchbaseConstants.CMD_GET key = str(self.cur_gets % self.cur_items) self.cur_ops = self.cur_ops + 1 if json: value = self.body % (prefix, key, int(key) % 101, key) else: value = self.body # generate a collection key if itr: try: cid = int(itr.next(), 16) except StopIteration: itr = iter(collections) cid = int(itr.next(), 16) encodedCid = encodeCollectionId(cid) # Generate the pack format and pack the key docKey = struct.pack( "!" + str(len(encodedCid)) + "s" + str(len(prefix)) + "s" + str(len(key)) + "s", encodedCid, prefix, key) else: docKey = prefix + key msg = (cmd, vbucket_id, docKey, flg, exp, cas, '', value, 0, 0, 0, 0) batch.append(msg, len(value)) if exit_after_creates and self.cur_items >= max_items: self.done = True return 0, batch if batch.size() <= 0: return 0, None return 0, batch