def vbucket_states(opts, spec, bucket_dir): """Reads all the latest couchstore files in a directory, and returns map of state string (e.g., 'active') to map of vbucket_id to doc.""" vbucket_states = defaultdict(dict) for f in latest_couch_files(bucket_dir): vbucket_id = int(re.match(SFD_RE, os.path.basename(f)).group(1)) try: store = couchstore.CouchStore(f, 'r') try: doc_str = store.localDocs['_local/vbstate'] if doc_str: doc = json.loads(doc_str) state = doc.get('state', None) if state: vbucket_states[state][vbucket_id] = doc else: return "error: missing vbucket_state from: %s" \ % (f), None except Exception as e: return ("error: could not read _local/vbstate from: %s" + "; exception: %s") % (f, e), None store.close() except Exception as e: return ("error: could not read couchstore file: %s" + "; exception: %s") % (f, e), None if vbucket_states: return 0, vbucket_states return "error: no vbucket_states in files: %s" % (bucket_dir), None
def vbucket_states( opts, spec, bucket_dir ) -> Tuple[couchbaseConstants.PUMP_ERROR, Optional[Dict[str, Any]]]: """Reads all the latest couchstore files in a directory, and returns map of state string (e.g., 'active') to map of vbucket_id to doc.""" vbucket_states: Dict[str, Any] = defaultdict(dict) for f in latest_couch_files(bucket_dir): vbucket_id = int(re.match( SFD_RE, os.path.basename(f)).group(1)) # type: ignore try: store = couchstore.CouchStore(f, 'r') try: doc_str = store.localDocs['_local/vbstate'] if doc_str: doc = json.loads(doc_str) state = doc.get('state', None) if state: vbucket_states[state][vbucket_id] = doc else: return f'error: missing vbucket_state from: {f}', None except Exception as e: return f'error: could not read _local/vbstate from: {f}; exception: {e}', None store.close() except Exception as e: return f'error: could not read couchstore file: {f}; exception: {e}', None if vbucket_states: return 0, vbucket_states return f'error: no vbucket_states in files: {bucket_dir}', None
def open_latest_store(bucket_dir: str, glob_pattern: str, filter_re: str, default_name: str, mode: str = 'c') \ -> Tuple[couchbaseConstants.PUMP_ERROR, Optional[couchstore.CouchStore], str]: store_paths = latest_couch_files(bucket_dir, glob_pattern=glob_pattern, filter_re=filter_re) if not store_paths: if mode == 'r': return 0, None, '' store_paths = [f'{bucket_dir}/{default_name}'] if len(store_paths) != 1: return f'error: no single, latest couchstore file: {glob_pattern}; found: {store_paths}', None, '' try: return 0, couchstore.CouchStore(str(store_paths[0]), mode), store_paths[0] except Exception as e: return f'error: could not open couchstore file: {store_paths[0]}; exception: {e}', None, ''
def open_latest_store(bucket_dir, glob_pattern, filter_re, default_name, mode='c'): store_paths = latest_couch_files(bucket_dir, glob_pattern=glob_pattern, filter_re=filter_re) if not store_paths: if mode == 'r': return 0, None, None store_paths = [bucket_dir + '/' + default_name] if len(store_paths) != 1: return ("error: no single, latest couchstore file: %s" + "; found: %s") % (glob_pattern, store_paths), None, None try: return 0, couchstore.CouchStore(str(store_paths[0]), mode), store_paths[0] except Exception as e: return ("error: could not open couchstore file: %s" + "; exception: %s") % (store_paths[0], e), None, None
def loader(self): rv, d = data_dir(self.spec) if rv != 0: self.queue.put((rv, None)) return source_vbucket_state = \ getattr(self.opts, 'source_vbucket_state', 'active') source_nodes = self.source_bucket['nodes'] if len(source_nodes) != 1: self.queue.put(( f'error: expected 1 node in source_bucket: {self.source_bucket["name"]}', None)) return vbucket_states = source_nodes[0].get('vbucket_states', None) if not vbucket_states: self.queue.put(( f'error: missing vbucket_states in source_bucket: {self.source_bucket["name"]}', None)) return vbuckets = vbucket_states.get(source_vbucket_state, None) if vbuckets is None: # Empty dict is valid. self.queue.put(( f'error: missing vbuckets in source_bucket: {self.source_bucket["name"]}', None)) return batch_max_size = self.opts.extra['batch_max_size'] batch_max_bytes = self.opts.extra['batch_max_bytes'] store = None vbucket_id = None # Level of indirection since we can't use python 3 nonlocal statement. abatch: List[pump.Batch] = [pump.Batch(self)] def change_callback(doc_info): if doc_info: # Handle the new key name spacing for collections and co cid, key = decodeCollectionID(doc_info.id.encode()) # Only support keys in the _default collection if cid != 0: logging.debug('Skipping as not default collection') return if self.skip(key, vbucket_id): return if doc_info.deleted: cmd = couchbaseConstants.CMD_DCP_DELETE val = b'' else: cmd = couchbaseConstants.CMD_DCP_MUTATION val = doc_info.getContents( options=couchstore.CouchStore.DECOMPRESS) try: rev_meta_bytes = doc_info.revMeta.get_bytes() if len(rev_meta_bytes) == 18: conf_res = 0 cas, exp, flg, flex_meta, dtype = struct.unpack( SFD_REV_META, rev_meta_bytes) elif len(rev_meta_bytes) == 19: cas, exp, flg, flex_meta, dtype, conf_res = struct.unpack( SFD_REV_META_PRE_4_6, rev_meta_bytes) else: raise ValueError( 'Does not match pre- or post-4.6 format') meta = bytes([doc_info.revSequence]) seqno = doc_info.sequence nmeta = 0 msg = (cmd, vbucket_id, key, flg, exp, cas, meta, val, seqno, dtype, nmeta, conf_res) abatch[0].append(msg, len(val)) except Exception as e: self.queue.put(( f'error: could not read couchstore file due to unsupported file format version;' f' exception: {e}', None)) return if (abatch[0].size() >= batch_max_size or abatch[0].bytes >= batch_max_bytes): self.queue.put((0, abatch[0])) abatch[0] = pump.Batch(self) for f in latest_couch_files(f'{d}/{self.source_bucket["name"]}'): vbucket_id = int(re.match(SFD_RE, os.path.basename(f)).group(1)) if not vbucket_id in vbuckets: continue try: store = couchstore.CouchStore(f, 'r') store.forEachChange(0, change_callback) store.close() except Exception as e: # MB-12270: Some files may be deleted due to compaction. We can # safely ignore them and move to next file. pass if abatch[0].size(): self.queue.put((0, abatch[0])) self.queue.put((0, None))
def loader(self): rv, d = data_dir(self.spec) if rv != 0: self.queue.put((rv, None)) return source_vbucket_state = \ getattr(self.opts, 'source_vbucket_state', 'active') source_nodes = self.source_bucket['nodes'] if len(source_nodes) != 1: self.queue.put(("error: expected 1 node in source_bucket: %s" % (self.source_bucket['name']), None)) return vbucket_states = source_nodes[0].get('vbucket_states', None) if not vbucket_states: self.queue.put( ("error: missing vbucket_states in source_bucket: %s" % (self.source_bucket['name']), None)) return vbuckets = vbucket_states.get(source_vbucket_state, None) if vbuckets is None: # Empty dict is valid. self.queue.put(("error: missing vbuckets in source_bucket: %s" % (self.source_bucket['name']), None)) return batch_max_size = self.opts.extra['batch_max_size'] batch_max_bytes = self.opts.extra['batch_max_bytes'] store = None vbucket_id = None # Level of indirection since we can't use python 3 nonlocal statement. abatch = [pump.Batch(self)] def change_callback(doc_info): if doc_info: key = doc_info.id if self.skip(key, vbucket_id): return if doc_info.deleted: cmd = memcacheConstants.CMD_TAP_DELETE val = '' else: cmd = memcacheConstants.CMD_TAP_MUTATION val = doc_info.getContents( options=couchstore.CouchStore.DECOMPRESS) cas, exp, flg = struct.unpack(SFD_REV_META, doc_info.revMeta) meta = struct.pack(SFD_REV_SEQ, doc_info.revSequence) msg = (cmd, vbucket_id, key, flg, exp, cas, meta, val) abatch[0].append(msg, len(val)) if (abatch[0].size() >= batch_max_size or abatch[0].bytes >= batch_max_bytes): self.queue.put((0, abatch[0])) abatch[0] = pump.Batch(self) for f in latest_couch_files(d + '/' + self.source_bucket['name']): vbucket_id = int(re.match(SFD_RE, os.path.basename(f)).group(1)) if not vbucket_id in vbuckets: continue try: store = couchstore.CouchStore(f, 'r') except Exception, e: self.queue.put(("error: could not open couchstore file: %s" "; exception: %s" % (f, e), None)) return store.forEachChange(0, change_callback) store.close()
"error: could not read couchstore file due to unsupported file format version;" " exception: %s" % e, None)) return if (abatch[0].size() >= batch_max_size or abatch[0].bytes >= batch_max_bytes): self.queue.put((0, abatch[0])) abatch[0] = pump.Batch(self) for f in latest_couch_files(d + '/' + self.source_bucket['name']): vbucket_id = int(re.match(SFD_RE, os.path.basename(f)).group(1)) if not vbucket_id in vbuckets: continue try: store = couchstore.CouchStore(f, 'r') store.forEachChange(0, change_callback) store.close() except Exception, e: #MB-12270: Some files may be deleted due to compaction. We can #safely ingore them and move to next file. pass if abatch[0].size(): self.queue.put((0, abatch[0])) self.queue.put((0, None)) class SFDSink(pump.Sink): """Sink for couchstore in couchbase server/file/directory layout.""" def __init__(self, opts, spec, source_bucket, source_node, source_map,