Example #1
0
    def vbucket_states(opts, spec, bucket_dir):
        """Reads all the latest couchstore files in a directory, and returns
           map of state string (e.g., 'active') to map of vbucket_id to doc."""
        vbucket_states = defaultdict(dict)

        for f in latest_couch_files(bucket_dir):
            vbucket_id = int(re.match(SFD_RE, os.path.basename(f)).group(1))
            try:
                store = couchstore.CouchStore(f, 'r')
                try:
                    doc_str = store.localDocs['_local/vbstate']
                    if doc_str:
                        doc = json.loads(doc_str)
                        state = doc.get('state', None)
                        if state:
                            vbucket_states[state][vbucket_id] = doc
                        else:
                            return "error: missing vbucket_state from: %s" \
                                % (f), None
                except Exception, e:
                    return ("error: could not read _local/vbstate from: %s" +
                            "; exception: %s") % (f, e), None
                store.close()
            except Exception, e:
                return ("error: could not read couchstore file: %s" +
                        "; exception: %s") % (f, e), None
Example #2
0
    def run_worker(self, thread_index):
        while not self.ctl['stop']:
            source_bucket, source_node, source_map, sink_map = \
                self.queue.get()
            hostname = source_node.get('hostname', NA)
            logging.debug(" node: %s" % (hostname))

            curx = defaultdict(int)
            self.source_class.check_spec(source_bucket, source_node, self.opts,
                                         self.source_spec, curx)
            self.sink_class.check_spec(source_bucket, source_node, self.opts,
                                       self.sink_spec, curx)
            rv = Pump(
                self.opts,
                self.source_class(self.opts, self.source_spec, source_bucket,
                                  source_node, source_map, sink_map, self.ctl,
                                  curx),
                self.sink_class(self.opts, self.sink_spec, source_bucket,
                                source_node, source_map, sink_map, self.ctl,
                                curx), source_map, sink_map, self.ctl,
                curx).run()

            for k, v in curx.items():
                if isinstance(v, int):
                    self.cur[k] = self.cur.get(k, 0) + v

            logging.debug(" node: %s, done; rv: %s" % (hostname, rv))
            if self.ctl['rv'] == 0 and rv != 0:
                self.ctl['rv'] = rv

            self.queue.task_done()
Example #3
0
    def vbucket_states(opts, spec, bucket_dir):
        """Reads all the latest couchstore files in a directory, and returns
           map of state string (e.g., 'active') to map of vbucket_id to doc."""
        vbucket_states = defaultdict(dict)

        for f in latest_couch_files(bucket_dir):
            vbucket_id = int(re.match(SFD_RE, os.path.basename(f)).group(1))
            try:
                store = couchstore.CouchStore(f, 'r')
                try:
                    doc_str = store.localDocs['_local/vbstate']
                    if doc_str:
                        doc = json.loads(doc_str)
                        state = doc.get('state', None)
                        if state:
                            vbucket_states[state][vbucket_id] = doc
                        else:
                            return "error: missing vbucket_state from: %s" \
                                % (f), None
                except Exception, e:
                    return ("error: could not read _local/vbstate from: %s" +
                            "; exception: %s") % (f, e), None
                store.close()
            except Exception, e:
                return ("error: could not read couchstore file: %s" +
                        "; exception: %s") % (f, e), None
Example #4
0
    def run_worker(self, thread_index):
        while not self.ctl["stop"]:
            source_bucket, source_node, source_map, sink_map = self.queue.get()

            hostname = source_node.get("hostname", NA)
            logging.debug(" node: %s" % (hostname))

            curx = defaultdict(int)

            rv = Pump(
                self.opts,
                self.source_class(
                    self.opts, self.source_spec, source_bucket, source_node, source_map, sink_map, self.ctl, curx
                ),
                self.sink_class(
                    self.opts, self.sink_spec, source_bucket, source_node, source_map, sink_map, self.ctl, curx
                ),
                source_map,
                sink_map,
                self.ctl,
                curx,
            ).run()

            for k, v in curx.items():
                self.cur[k] = self.cur.get(k, 0) + v

            logging.debug(" node: %s, done; rv: %s" % (hostname, rv))
            if self.ctl["rv"] == 0 and rv != 0:
                self.ctl["rv"] = rv

            self.queue.task_done()
Example #5
0
 def __init__(self, opts, source_class, source_spec, sink_class, sink_spec):
     self.opts = opts
     self.source_class = source_class
     self.source_spec = source_spec
     self.sink_class = sink_class
     self.sink_spec = sink_spec
     self.queue = None
     self.ctl = { 'stop': False, 'rv': 0, 'new_session': True }
     self.cur = defaultdict(int)
Example #6
0
 def __init__(self, opts, source_class, source_spec, sink_class, sink_spec):
     self.opts = opts
     self.source_class = source_class
     self.source_spec = source_spec
     self.sink_class = sink_class
     self.sink_spec = sink_spec
     self.queue = None
     self.ctl = {"stop": False, "rv": 0}
     self.cur = defaultdict(int)
Example #7
0
 def __init__(self, opts, source_class, source_spec, sink_class, sink_spec):
     self.opts = opts
     self.source_class = source_class
     self.source_spec = source_spec
     self.sink_class = sink_class
     self.sink_spec = sink_spec
     self.queue = None
     self.ctl = {'stop': False, 'rv': 0}
     self.cur = defaultdict(int)
Example #8
0
 def group_by_vbucket_id(self, vbuckets_num, rehash=0):
     """Returns dict of vbucket_id->[msgs] grouped by msg's vbucket_id."""
     g = defaultdict(list)
     for msg in self.msgs:
         cmd, vbucket_id, key = msg[:3]
         if vbucket_id == 0x0000ffff or rehash == 1:
             # Special case when the source did not supply a vbucket_id
             # (such as stdin source), so we calculate it.
             vbucket_id = (zlib.crc32(key) >> 16) & (vbuckets_num - 1)
             msg = (cmd, vbucket_id) + msg[2:]
         g[vbucket_id].append(msg)
     return g
Example #9
0
 def group_by_vbucket_id(self, vbuckets_num):
     """Returns dict of vbucket_id->[msgs] grouped by msg's vbucket_id."""
     g = defaultdict(list)
     for msg in self.msgs:
         cmd, vbucket_id, key, flg, exp, cas, meta, val = msg
         if vbucket_id == 0x0000FFFF:
             # Special case when the source did not supply a vbucket_id
             # (such as stdin source), so we calculate it.
             vbucket_id = (zlib.crc32(key) >> 16) & (vbuckets_num - 1)
             msg = (cmd, vbucket_id, key, flg, exp, cas, meta, val)
         g[vbucket_id].append(msg)
     return g
Example #10
0
 def group_by_vbucket_id(self, vbuckets_num, rehash=0):
     """Returns dict of vbucket_id->[msgs] grouped by msg's vbucket_id."""
     g = defaultdict(list)
     for msg in self.msgs:
         cmd, vbucket_id, key = msg[:3]
         if vbucket_id == 0x0000ffff or rehash == 1:
             # Special case when the source did not supply a vbucket_id
             # (such as stdin source), so we calculate it.
             vbucket_id = (zlib.crc32(key) >> 16) & (vbuckets_num - 1)
             msg = (cmd, vbucket_id) + msg[2:]
         g[vbucket_id].append(msg)
     return g
Example #11
0
 def __init__(self, opts, source_class, source_spec, sink_class, sink_spec):
     self.opts = opts
     self.source_class = source_class
     self.source_spec = source_spec
     self.sink_class = sink_class
     self.sink_spec = sink_spec
     self.queue = None
     tmstamp = time.strftime("%Y-%m-%dT%H%M%SZ", time.gmtime())
     self.ctl = { 'stop': False,
                  'rv': 0,
                  'new_session': True,
                  'new_timestamp': tmstamp}
     self.cur = defaultdict(int)
Example #12
0
 def __init__(self, opts, source_class, source_spec, sink_class, sink_spec):
     self.opts = opts
     self.source_class = source_class
     self.source_spec = source_spec
     self.sink_class = sink_class
     self.sink_spec = sink_spec
     self.queue = None
     tmstamp = time.strftime("%Y-%m-%dT%H%M%SZ", time.gmtime())
     self.ctl = {
         'stop': False,
         'rv': 0,
         'new_session': True,
         'new_timestamp': tmstamp
     }
     self.cur = defaultdict(int)
Example #13
0
 def connect_db(self):
     #Build vbucket state hash table
     vbucket_states = defaultdict(dict)
     sql = """SELECT vbid, vb_version, state FROM vbucket_states"""
     try:
         db = sqlite3.connect(self.spec)
         cur = db.cursor()
         for row in cur.execute(sql):
             vbucket_id = int(row[0])
             vb_version = int(row[1])
             state = str(row[2])
             vbucket_states[state][vbucket_id] = vb_version
         cur.close()
         db.close()
     except sqlite3.DatabaseError, e:
         return "error: no vbucket_states table was found;" + \
                " check if db files are correct", None, None, None
Example #14
0
 def connect_db(self):
     #Build vbucket state hash table
     vbucket_states = defaultdict(dict)
     sql = """SELECT vbid, vb_version, state FROM vbucket_states"""
     try:
         db = sqlite3.connect(self.spec)
         cur = db.cursor()
         for row in cur.execute(sql):
             vbucket_id = int(row[0])
             vb_version = int(row[1])
             state = str(row[2])
             vbucket_states[state][vbucket_id] = vb_version
         cur.close()
         db.close()
     except sqlite3.DatabaseError, e:
         return "error: no vbucket_states table was found;" + \
                " check if db files are correct", None, None, None
Example #15
0
    def check(opts, spec):
        spec = os.path.normpath(spec)
        if not os.path.isfile(spec):
            return "error: backup_dir is not a file: " + spec, None

        db_files = MBFSource.db_files(spec)
        versions = MBFSource.db_file_versions(db_files)
        logging.debug(" MBFSource check db file versions: %s" % (versions))
        if max(versions.values()) < 2:
            err = ("error: wrong backup/db file versions;\n" +
                   " either the metadata db file is not specified\n" +
                   " or the backup files need upgrading to version %s;\n" +
                   " please use cbdbupgrade or contact support.") \
                   % (MBF_VERSION)
            return err, None

        # Map of state string (e.g., 'active') to map of vbucket_id to info.
        vbucket_states = defaultdict(dict)
        sql = """SELECT vbid, vb_version, state, checkpoint_id
                   FROM vbucket_states"""
        for db_file in [f for f in db_files if f.endswith(".mb")]:
            try:
                db = sqlite3.connect(db_file)
                cur = db.cursor()
                for row in cur.execute(sql):
                    vbucket_id = row[0]
                    state = row[2]
                    vbucket_states[state][vbucket_id] = {
                        'vbucket_id': vbucket_id,
                        'vb_version': row[1],
                        'state': state,
                        'checkpoint_id': row[3]
                        }
                cur.close()
                db.close()
            except sqlite3.DatabaseError, e:
                pass # A missing vbucket_states table is expected.
Example #16
0
    def check(opts, spec):
        spec = os.path.normpath(spec)
        if not os.path.isfile(spec):
            return "error: backup_dir is not a file: " + spec, None

        db_files = MBFSource.db_files(spec)
        versions = MBFSource.db_file_versions(db_files)
        logging.debug(" MBFSource check db file versions: %s" % (versions))
        if max(versions.values()) < 2:
            err = ("error: wrong backup/db file versions;\n" +
                   " either the metadata db file is not specified\n" +
                   " or the backup files need upgrading to version %s;\n" +
                   " please use cbdbupgrade or contact support.") \
                   % (MBF_VERSION)
            return err, None

        # Map of state string (e.g., 'active') to map of vbucket_id to info.
        vbucket_states = defaultdict(dict)
        sql = """SELECT vbid, vb_version, state, checkpoint_id
                   FROM vbucket_states"""
        db_file = spec
        try:
            db = sqlite3.connect(db_file)
            cur = db.cursor()
            for row in cur.execute(sql):
                vbucket_id = row[0]
                state = str(row[2])
                vbucket_states[state][vbucket_id] = {
                    'vbucket_id': vbucket_id,
                    'vb_version': row[1],
                    'state': state,
                    'checkpoint_id': row[3]
                }
            cur.close()
            db.close()
        except sqlite3.DatabaseError, e:
            pass  # A missing vbucket_states table is expected.
Example #17
0
    def run_worker(self, thread_index):
        while not self.ctl['stop']:
            source_bucket, source_node, source_map, sink_map = \
                self.queue.get()
            hostname = source_node.get('hostname', NA)
            logging.debug(" node: %s" % (hostname))

            curx = defaultdict(int)
            self.source_class.check_spec(source_bucket,
                                         source_node,
                                         self.opts,
                                         self.source_spec,
                                         curx)
            self.sink_class.check_spec(source_bucket,
                                       source_node,
                                       self.opts,
                                       self.sink_spec,
                                       curx)
            rv = Pump(self.opts,
                      self.source_class(self.opts, self.source_spec,
                                        source_bucket, source_node,
                                        source_map, sink_map, self.ctl, curx),
                      self.sink_class(self.opts, self.sink_spec,
                                      source_bucket, source_node,
                                      source_map, sink_map, self.ctl, curx),
                      source_map, sink_map, self.ctl, curx).run()

            for k, v in curx.items():
                if isinstance(v, int):
                    self.cur[k] = self.cur.get(k, 0) + v

            logging.debug(" node: %s, done; rv: %s" % (hostname, rv))
            if self.ctl['rv'] == 0 and rv != 0:
                self.ctl['rv'] = rv

            self.queue.task_done()
Example #18
0
 def report_init(self):
     self.beg_time = time.time()
     self.prev_time = self.beg_time
     self.prev = defaultdict(int)
Example #19
0
 def report_init(self):
     self.beg_time = time.time()
     self.prev_time = self.beg_time
     self.prev = defaultdict(int)