def vbucket_states(opts, spec, bucket_dir): """Reads all the latest couchstore files in a directory, and returns map of state string (e.g., 'active') to map of vbucket_id to doc.""" vbucket_states = defaultdict(dict) for f in latest_couch_files(bucket_dir): vbucket_id = int(re.match(SFD_RE, os.path.basename(f)).group(1)) try: store = couchstore.CouchStore(f, 'r') try: doc_str = store.localDocs['_local/vbstate'] if doc_str: doc = json.loads(doc_str) state = doc.get('state', None) if state: vbucket_states[state][vbucket_id] = doc else: return "error: missing vbucket_state from: %s" \ % (f), None except Exception, e: return ("error: could not read _local/vbstate from: %s" + "; exception: %s") % (f, e), None store.close() except Exception, e: return ("error: could not read couchstore file: %s" + "; exception: %s") % (f, e), None
def run_worker(self, thread_index): while not self.ctl['stop']: source_bucket, source_node, source_map, sink_map = \ self.queue.get() hostname = source_node.get('hostname', NA) logging.debug(" node: %s" % (hostname)) curx = defaultdict(int) self.source_class.check_spec(source_bucket, source_node, self.opts, self.source_spec, curx) self.sink_class.check_spec(source_bucket, source_node, self.opts, self.sink_spec, curx) rv = Pump( self.opts, self.source_class(self.opts, self.source_spec, source_bucket, source_node, source_map, sink_map, self.ctl, curx), self.sink_class(self.opts, self.sink_spec, source_bucket, source_node, source_map, sink_map, self.ctl, curx), source_map, sink_map, self.ctl, curx).run() for k, v in curx.items(): if isinstance(v, int): self.cur[k] = self.cur.get(k, 0) + v logging.debug(" node: %s, done; rv: %s" % (hostname, rv)) if self.ctl['rv'] == 0 and rv != 0: self.ctl['rv'] = rv self.queue.task_done()
def run_worker(self, thread_index): while not self.ctl["stop"]: source_bucket, source_node, source_map, sink_map = self.queue.get() hostname = source_node.get("hostname", NA) logging.debug(" node: %s" % (hostname)) curx = defaultdict(int) rv = Pump( self.opts, self.source_class( self.opts, self.source_spec, source_bucket, source_node, source_map, sink_map, self.ctl, curx ), self.sink_class( self.opts, self.sink_spec, source_bucket, source_node, source_map, sink_map, self.ctl, curx ), source_map, sink_map, self.ctl, curx, ).run() for k, v in curx.items(): self.cur[k] = self.cur.get(k, 0) + v logging.debug(" node: %s, done; rv: %s" % (hostname, rv)) if self.ctl["rv"] == 0 and rv != 0: self.ctl["rv"] = rv self.queue.task_done()
def __init__(self, opts, source_class, source_spec, sink_class, sink_spec): self.opts = opts self.source_class = source_class self.source_spec = source_spec self.sink_class = sink_class self.sink_spec = sink_spec self.queue = None self.ctl = { 'stop': False, 'rv': 0, 'new_session': True } self.cur = defaultdict(int)
def __init__(self, opts, source_class, source_spec, sink_class, sink_spec): self.opts = opts self.source_class = source_class self.source_spec = source_spec self.sink_class = sink_class self.sink_spec = sink_spec self.queue = None self.ctl = {"stop": False, "rv": 0} self.cur = defaultdict(int)
def __init__(self, opts, source_class, source_spec, sink_class, sink_spec): self.opts = opts self.source_class = source_class self.source_spec = source_spec self.sink_class = sink_class self.sink_spec = sink_spec self.queue = None self.ctl = {'stop': False, 'rv': 0} self.cur = defaultdict(int)
def group_by_vbucket_id(self, vbuckets_num, rehash=0): """Returns dict of vbucket_id->[msgs] grouped by msg's vbucket_id.""" g = defaultdict(list) for msg in self.msgs: cmd, vbucket_id, key = msg[:3] if vbucket_id == 0x0000ffff or rehash == 1: # Special case when the source did not supply a vbucket_id # (such as stdin source), so we calculate it. vbucket_id = (zlib.crc32(key) >> 16) & (vbuckets_num - 1) msg = (cmd, vbucket_id) + msg[2:] g[vbucket_id].append(msg) return g
def group_by_vbucket_id(self, vbuckets_num): """Returns dict of vbucket_id->[msgs] grouped by msg's vbucket_id.""" g = defaultdict(list) for msg in self.msgs: cmd, vbucket_id, key, flg, exp, cas, meta, val = msg if vbucket_id == 0x0000FFFF: # Special case when the source did not supply a vbucket_id # (such as stdin source), so we calculate it. vbucket_id = (zlib.crc32(key) >> 16) & (vbuckets_num - 1) msg = (cmd, vbucket_id, key, flg, exp, cas, meta, val) g[vbucket_id].append(msg) return g
def __init__(self, opts, source_class, source_spec, sink_class, sink_spec): self.opts = opts self.source_class = source_class self.source_spec = source_spec self.sink_class = sink_class self.sink_spec = sink_spec self.queue = None tmstamp = time.strftime("%Y-%m-%dT%H%M%SZ", time.gmtime()) self.ctl = { 'stop': False, 'rv': 0, 'new_session': True, 'new_timestamp': tmstamp} self.cur = defaultdict(int)
def __init__(self, opts, source_class, source_spec, sink_class, sink_spec): self.opts = opts self.source_class = source_class self.source_spec = source_spec self.sink_class = sink_class self.sink_spec = sink_spec self.queue = None tmstamp = time.strftime("%Y-%m-%dT%H%M%SZ", time.gmtime()) self.ctl = { 'stop': False, 'rv': 0, 'new_session': True, 'new_timestamp': tmstamp } self.cur = defaultdict(int)
def connect_db(self): #Build vbucket state hash table vbucket_states = defaultdict(dict) sql = """SELECT vbid, vb_version, state FROM vbucket_states""" try: db = sqlite3.connect(self.spec) cur = db.cursor() for row in cur.execute(sql): vbucket_id = int(row[0]) vb_version = int(row[1]) state = str(row[2]) vbucket_states[state][vbucket_id] = vb_version cur.close() db.close() except sqlite3.DatabaseError, e: return "error: no vbucket_states table was found;" + \ " check if db files are correct", None, None, None
def check(opts, spec): spec = os.path.normpath(spec) if not os.path.isfile(spec): return "error: backup_dir is not a file: " + spec, None db_files = MBFSource.db_files(spec) versions = MBFSource.db_file_versions(db_files) logging.debug(" MBFSource check db file versions: %s" % (versions)) if max(versions.values()) < 2: err = ("error: wrong backup/db file versions;\n" + " either the metadata db file is not specified\n" + " or the backup files need upgrading to version %s;\n" + " please use cbdbupgrade or contact support.") \ % (MBF_VERSION) return err, None # Map of state string (e.g., 'active') to map of vbucket_id to info. vbucket_states = defaultdict(dict) sql = """SELECT vbid, vb_version, state, checkpoint_id FROM vbucket_states""" for db_file in [f for f in db_files if f.endswith(".mb")]: try: db = sqlite3.connect(db_file) cur = db.cursor() for row in cur.execute(sql): vbucket_id = row[0] state = row[2] vbucket_states[state][vbucket_id] = { 'vbucket_id': vbucket_id, 'vb_version': row[1], 'state': state, 'checkpoint_id': row[3] } cur.close() db.close() except sqlite3.DatabaseError, e: pass # A missing vbucket_states table is expected.
def check(opts, spec): spec = os.path.normpath(spec) if not os.path.isfile(spec): return "error: backup_dir is not a file: " + spec, None db_files = MBFSource.db_files(spec) versions = MBFSource.db_file_versions(db_files) logging.debug(" MBFSource check db file versions: %s" % (versions)) if max(versions.values()) < 2: err = ("error: wrong backup/db file versions;\n" + " either the metadata db file is not specified\n" + " or the backup files need upgrading to version %s;\n" + " please use cbdbupgrade or contact support.") \ % (MBF_VERSION) return err, None # Map of state string (e.g., 'active') to map of vbucket_id to info. vbucket_states = defaultdict(dict) sql = """SELECT vbid, vb_version, state, checkpoint_id FROM vbucket_states""" db_file = spec try: db = sqlite3.connect(db_file) cur = db.cursor() for row in cur.execute(sql): vbucket_id = row[0] state = str(row[2]) vbucket_states[state][vbucket_id] = { 'vbucket_id': vbucket_id, 'vb_version': row[1], 'state': state, 'checkpoint_id': row[3] } cur.close() db.close() except sqlite3.DatabaseError, e: pass # A missing vbucket_states table is expected.
def run_worker(self, thread_index): while not self.ctl['stop']: source_bucket, source_node, source_map, sink_map = \ self.queue.get() hostname = source_node.get('hostname', NA) logging.debug(" node: %s" % (hostname)) curx = defaultdict(int) self.source_class.check_spec(source_bucket, source_node, self.opts, self.source_spec, curx) self.sink_class.check_spec(source_bucket, source_node, self.opts, self.sink_spec, curx) rv = Pump(self.opts, self.source_class(self.opts, self.source_spec, source_bucket, source_node, source_map, sink_map, self.ctl, curx), self.sink_class(self.opts, self.sink_spec, source_bucket, source_node, source_map, sink_map, self.ctl, curx), source_map, sink_map, self.ctl, curx).run() for k, v in curx.items(): if isinstance(v, int): self.cur[k] = self.cur.get(k, 0) + v logging.debug(" node: %s, done; rv: %s" % (hostname, rv)) if self.ctl['rv'] == 0 and rv != 0: self.ctl['rv'] = rv self.queue.task_done()
def report_init(self): self.beg_time = time.time() self.prev_time = self.beg_time self.prev = defaultdict(int)