def consume_design(opts, sink_spec, sink_map, source_bucket, source_map, source_design) -> couchbaseConstants.PUMP_ERROR: if not source_design: return 0 try: sd = json.loads(source_design) except ValueError as e: return f'error: could not parse source_design: {source_design}' rv, d = data_dir(sink_spec) if rv != 0: return rv bucket_dir = f'{d}/{source_bucket["name"]}' if not os.path.isdir(bucket_dir): os.mkdir(bucket_dir) rv, store, store_path = \ open_latest_store(bucket_dir, "master.couch.*", "^(master)\\.couch\\.([0-9]+)$", "master.couch.1") if rv != 0: return rv bulk_keys = [] bulk_vals = [] if sd: for row in sd['rows']: logging.debug("design_doc row: " + str(row)) doc_info = couchstore.DocumentInfo(str(row['id'])) if '_rev' in row['doc']: doc_info.revMeta = str(row['doc']['_rev']) del row['doc']['_rev'] doc_info.contentType = couchstore.DocumentInfo.IS_JSON bulk_keys.append(doc_info) bulk_vals.append(json.dumps(row['doc'])) if bulk_keys and bulk_vals: store.saveMultiple( bulk_keys, bulk_vals) # type: ignore # TODO: Compress ddocs? store.commit() # type: ignore store.close() # type: ignore return 0
def run(self): destination_vbucket_state = \ getattr(self.opts, 'destination_vbucket_state', 'active') vbucket_states = self.source_node.get('vbucket_states', {}) while not self.ctl['stop']: batch, future = self.pull_next_batch() if not batch: return self.future_done(future, 0) vbuckets = batch.group_by_vbucket_id(SFD_VBUCKETS, self.rehash) for vbucket_id, msgs in vbuckets.items(): checkpoint_id = 0 max_deleted_seqno = 0 rv, store, store_path = self.open_store(vbucket_id) if rv != 0: return self.future_done(future, rv) bulk_keys = [] bulk_vals = [] for i, msg in enumerate(msgs): cmd, _vbucket_id, key, flg, exp, cas, meta, val, seqno, dtype, nmeta, conf_res = msg if self.skip(key, vbucket_id): continue d = couchstore.DocumentInfo(str(key)) flex_meta = 1 d.revMeta = struct.pack(SFD_REV_META, cas, exp, flg, flex_meta, dtype) if meta: meta = hex(meta) if len(meta) > 8: meta = meta[0:8] if len(meta) < 8: meta = ('\x00\x00\x00\x00\x00\x00\x00\x00' + meta)[-8:] meta = meta.encode() d.revSequence, = struct.unpack(SFD_REV_SEQ, meta) else: d.revSequence = 1 if seqno: d.sequence = int(seqno) if cmd == couchbaseConstants.CMD_TAP_MUTATION or cmd == couchbaseConstants.CMD_DCP_MUTATION: v = str(val) try: if re.match('^\\s*{', v) and json.loads(v) is not None: d.contentType = couchstore.DocumentInfo.IS_JSON except ValueError: pass # NON_JSON is already the default contentType. elif cmd == couchbaseConstants.CMD_TAP_DELETE or cmd == couchbaseConstants.CMD_DCP_DELETE: v = None else: self.future_done(future, "error: SFDSink bad cmd: " + str(cmd)) store.close() return bulk_keys.append(d) bulk_vals.append(v) try: if bulk_keys and bulk_vals: vm = vbucket_states.get(destination_vbucket_state, None) if vm: vi = vm.get(vbucket_id, None) if vi: c = int(vi.get("checkpoint_id", checkpoint_id)) checkpoint_id = max(checkpoint_id, c) m = int( vi.get("max_deleted_seqno", max_deleted_seqno)) max_deleted_seqno = max(max_deleted_seqno, m) rv = self.save_vbucket_state( store, vbucket_id, destination_vbucket_state, checkpoint_id, max_deleted_seqno) if rv != 0: self.future_done(future, rv) store.close() return store.saveMultiple( bulk_keys, bulk_vals, options=couchstore.CouchStore.COMPRESS) store.commit() store.close() except Exception as e: self.future_done( future, "error: could not save couchstore data" "; vbucket_id: %s; store_path: %s" "; exception: %s" % (vbucket_id, store_path, e)) return self.future_done(future, 0) # No return to keep looping.
def run(self): destination_vbucket_state = getattr(self.opts, 'destination_vbucket_state', 'active') vbucket_states = self.source_node.get('vbucket_states', {}) while not self.ctl['stop']: batch, future = self.pull_next_batch( ) # type: pump.Batch, pump.SinkBatchFuture if not batch: return self.future_done(future, 0) vbuckets = batch.group_by_vbucket_id(SFD_VBUCKETS, self.rehash) for vbucket_id, msgs in vbuckets.items(): checkpoint_id = 0 max_deleted_seqno = 0 rv, store, store_path = self.open_store(vbucket_id) if rv != 0: return self.future_done(future, rv) bulk_keys = [] bulk_vals = [] for i, msg in enumerate(msgs): cmd, _vbucket_id, key, flg, exp, cas, meta, val, seqno, dtype, nmeta, conf_res = msg if self.skip(key, vbucket_id): continue # TODO: add default collection to all keys in CC this should change to have the correct collection key = encodeCollectionId(0) + key d = couchstore.DocumentInfo(key.decode()) flex_meta = 1 d.revMeta = struct.pack(SFD_REV_META, cas, exp, flg, flex_meta, dtype) if len(meta) != 0: if len(meta) > 8: meta = meta[0:8] if len(meta) < 8: meta = (b'\x00\x00\x00\x00\x00\x00\x00\x00' + meta)[-8:] d.revSequence, = struct.unpack(SFD_REV_SEQ, meta) else: d.revSequence = 1 if seqno: d.sequence = int(seqno) if cmd == couchbaseConstants.CMD_TAP_MUTATION or cmd == couchbaseConstants.CMD_DCP_MUTATION: try: v = val if dtype & 0x01: d.contentType = couchstore.DocumentInfo.IS_JSON # Why do this when we have a flag for it? # if re.match('^\\s*{', v) and json.loads(v) is not None: # d.contentType = couchstore.DocumentInfo.IS_JSON except ValueError: pass # NON_JSON is already the default contentType. elif cmd == couchbaseConstants.CMD_TAP_DELETE or cmd == couchbaseConstants.CMD_DCP_DELETE: v = None else: self.future_done(future, f'error: SFDSink bad cmd: {cmd!s}') store.close() return bulk_keys.append(d) bulk_vals.append(v) try: if bulk_keys and bulk_vals: vm = vbucket_states.get(destination_vbucket_state, None) if vm: vi = vm.get(vbucket_id, None) if vi: c = int(vi.get("checkpoint_id", checkpoint_id)) checkpoint_id = max(checkpoint_id, c) m = int( vi.get("max_deleted_seqno", max_deleted_seqno)) max_deleted_seqno = max(max_deleted_seqno, m) rv = self.save_vbucket_state( store, vbucket_id, destination_vbucket_state, checkpoint_id, max_deleted_seqno) if rv != 0: self.future_done(future, rv) store.close() return store.saveMultiple( bulk_keys, bulk_vals, options=couchstore.CouchStore.COMPRESS) store.commit() store.close() except Exception as e: self.future_done( future, f'error: could not save couchstore data; vbucket_id: {vbucket_id}; ' f'store_path: {store_path}; exception: {e}') return self.future_done(future, 0) # No return to keep looping.
rv, store, store_path = \ open_latest_store(bucket_dir, "master.couch.*", "^(master)\\.couch\\.([0-9]+)$", "master.couch.1") if rv != 0: return rv bulk_keys = [] bulk_vals = [] if sd: for row in sd['rows']: logging.debug("design_doc row: " + str(row)) d = couchstore.DocumentInfo(str(row['id'])) if '_rev' in row['doc']: d.revMeta = str(row['doc']['_rev']) del row['doc']['_rev'] d.contentType = couchstore.DocumentInfo.IS_JSON bulk_keys.append(d) bulk_vals.append(json.dumps(row['doc'])) if bulk_keys and bulk_vals: store.saveMultiple(bulk_keys, bulk_vals) # TODO: Compress ddocs? store.commit() store.close() return 0