Beispiel #1
0
    def run(self):
        destination_vbucket_state = getattr(self.opts,
                                            'destination_vbucket_state',
                                            'active')

        vbucket_states = self.source_node.get('vbucket_states', {})

        while not self.ctl['stop']:
            batch, future = self.pull_next_batch(
            )  # type: pump.Batch, pump.SinkBatchFuture
            if not batch:
                return self.future_done(future, 0)

            vbuckets = batch.group_by_vbucket_id(SFD_VBUCKETS, self.rehash)
            for vbucket_id, msgs in vbuckets.items():
                checkpoint_id = 0
                max_deleted_seqno = 0

                rv, store, store_path = self.open_store(vbucket_id)
                if rv != 0:
                    return self.future_done(future, rv)

                bulk_keys = []
                bulk_vals = []

                for i, msg in enumerate(msgs):
                    cmd, _vbucket_id, key, flg, exp, cas, meta, val, seqno, dtype, nmeta, conf_res = msg
                    if self.skip(key, vbucket_id):
                        continue

                    # TODO: add default collection to all keys in CC this should change to have the correct collection
                    key = encodeCollectionId(0) + key
                    d = couchstore.DocumentInfo(key.decode())
                    flex_meta = 1
                    d.revMeta = struct.pack(SFD_REV_META, cas, exp, flg,
                                            flex_meta, dtype)
                    if len(meta) != 0:
                        if len(meta) > 8:
                            meta = meta[0:8]
                        if len(meta) < 8:
                            meta = (b'\x00\x00\x00\x00\x00\x00\x00\x00' +
                                    meta)[-8:]
                        d.revSequence, = struct.unpack(SFD_REV_SEQ, meta)
                    else:
                        d.revSequence = 1

                    if seqno:
                        d.sequence = int(seqno)
                    if cmd == couchbaseConstants.CMD_TAP_MUTATION or cmd == couchbaseConstants.CMD_DCP_MUTATION:
                        try:
                            v = val
                            if dtype & 0x01:
                                d.contentType = couchstore.DocumentInfo.IS_JSON
                            # Why do this when we have a flag for it?
                            # if re.match('^\\s*{', v) and json.loads(v) is not None:
                            #     d.contentType = couchstore.DocumentInfo.IS_JSON
                        except ValueError:
                            pass  # NON_JSON is already the default contentType.
                    elif cmd == couchbaseConstants.CMD_TAP_DELETE or cmd == couchbaseConstants.CMD_DCP_DELETE:
                        v = None
                    else:
                        self.future_done(future,
                                         f'error: SFDSink bad cmd: {cmd!s}')
                        store.close()
                        return

                    bulk_keys.append(d)
                    bulk_vals.append(v)

                try:
                    if bulk_keys and bulk_vals:
                        vm = vbucket_states.get(destination_vbucket_state,
                                                None)
                        if vm:
                            vi = vm.get(vbucket_id, None)
                            if vi:
                                c = int(vi.get("checkpoint_id", checkpoint_id))
                                checkpoint_id = max(checkpoint_id, c)
                                m = int(
                                    vi.get("max_deleted_seqno",
                                           max_deleted_seqno))
                                max_deleted_seqno = max(max_deleted_seqno, m)

                        rv = self.save_vbucket_state(
                            store, vbucket_id, destination_vbucket_state,
                            checkpoint_id, max_deleted_seqno)
                        if rv != 0:
                            self.future_done(future, rv)
                            store.close()
                            return

                        store.saveMultiple(
                            bulk_keys,
                            bulk_vals,
                            options=couchstore.CouchStore.COMPRESS)

                    store.commit()
                    store.close()
                except Exception as e:
                    self.future_done(
                        future,
                        f'error: could not save couchstore data; vbucket_id: {vbucket_id}; '
                        f'store_path: {store_path}; exception: {e}')
                    return

            self.future_done(future, 0)  # No return to keep looping.
Beispiel #2
0
    def provide_batch(self) -> Tuple[couchbaseConstants.PUMP_ERROR, Optional[pump.Batch]]:
        """Provides a batch of messages, with GET/SET ratios and keys
           controlled by a mcsoda-inspired approach, but simpler."""
        if self.done:
            return 0, None

        cfg: Dict[str, Any] = self.source_map['cfg']
        prefix: str = cfg['prefix']
        max_items: int = cfg['max-items']
        ratio_sets: float = cfg['ratio-sets']
        exit_after_creates: bool = cfg['exit-after-creates']
        low_compression: bool = cfg['low-compression']
        xattrs: bool = cfg['xattr']
        itr = None
        collections = self.opts.collection
        if collections:
            itr = iter(collections)

        json_body: bool = cfg['json']
        if not self.body:

            if low_compression:
                # Generate a document which snappy will struggle to compress.
                # Useful if your creating data-sets which utilise disk.
                random.seed(0)  # Seed to a fixed value so we always have the same document pattern.
                document = ''.join(random.choice(string.ascii_uppercase) for _ in range(cfg['min-value-size']))
            else:
                # else a string of 0 is fine, but will compress very well.
                document = "0" * cfg['min-value-size']

            self.body = document

        batch = pump.Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        vbucket_id = 0x0000ffff
        cas, exp, flg = 0, 0, 0

        while (batch.size() < batch_max_size and
               batch.bytes < batch_max_bytes):
            if ratio_sets >= float(self.cur_sets) / float(self.cur_ops or 1):
                self.cur_sets = self.cur_sets + 1
                if xattrs:
                    cmd: int = couchbaseConstants.CMD_SUBDOC_MULTIPATH_MUTATION
                else:
                    cmd = couchbaseConstants.CMD_DCP_MUTATION
                if self.cur_items < max_items:
                    key = str(self.cur_items)
                    self.cur_items = self.cur_items + 1
                else:
                    key = str(self.cur_sets % self.cur_items)
            else:
                self.cur_gets = self.cur_gets + 1
                if xattrs:
                    cmd = couchbaseConstants.CMD_SUBDOC_MULTIPATH_LOOKUP
                else:
                    cmd = couchbaseConstants.CMD_GET
                key = str(self.cur_gets % self.cur_items)
            self.cur_ops = self.cur_ops + 1

            if json_body:
                value = f'{{"name": "{prefix}{key}", "age": {int(key) % 101}, "index": "{key}", "body":"{self.body}"}}'
            else:
                value = self.body

            if xattrs:
                value = json.dumps({"obj": value, "xattr_f": "field1", "xattr_v": "\"value1\""})

            value_bytes: bytes = value.encode()
            # generate a collection key
            if itr:
                try:
                    cid = int(next(itr), 16)
                except StopIteration:
                    itr = iter(collections)
                    cid = int(next(itr), 16)

                encodedCid = encodeCollectionId(cid)
                # Generate the pack format and pack the key
                docKey: bytes = struct.pack(
                    ("!" + str(len(encodedCid)) + "s"
                        + str(len(prefix)) + "s"
                        + str(len(key)) + "s").encode(),
                    encodedCid,
                    prefix.encode(),
                    key.encode());
            else:
                docKey = prefix.encode() + key.encode()

            datatype = 0x00
            if json_body:
                datatype = 0x01

            msg: couchbaseConstants.BATCH_MSG = (cmd, vbucket_id, docKey, flg, exp, cas, b'', value_bytes, 0, datatype,
                                                0, 0)
            batch.append(msg, len(value_bytes))

            if exit_after_creates and self.cur_items >= max_items:
                self.done = True
                return 0, batch

        if batch.size() <= 0:
            return 0, None
        return 0, batch
Beispiel #3
0
    def run(self):
        destination_vbucket_state = getattr(self.opts, 'destination_vbucket_state', 'active')

        vbucket_states = self.source_node.get('vbucket_states', {})

        while not self.ctl['stop']:
            batch, future = self.pull_next_batch()  # type: pump.Batch, pump.SinkBatchFuture
            if not batch:
                return self.future_done(future, 0)

            vbuckets = batch.group_by_vbucket_id(SFD_VBUCKETS, self.rehash)
            for vbucket_id, msgs in vbuckets.items():
                checkpoint_id = 0
                max_deleted_seqno = 0

                rv, store, store_path = self.open_store(vbucket_id)
                if rv != 0:
                    return self.future_done(future, rv)

                bulk_keys = []
                bulk_vals = []

                for i, msg in enumerate(msgs):
                    cmd, _vbucket_id, key, flg, exp, cas, meta, val, seqno, dtype, nmeta, conf_res = msg
                    if self.skip(key, vbucket_id):
                        continue

                    # TODO: add default collection to all keys in CC this should change to have the correct collection
                    key = encodeCollectionId(0) + key
                    d = couchstore.DocumentInfo(key.decode())
                    flex_meta = 1
                    d.revMeta = struct.pack(SFD_REV_META, cas, exp, flg, flex_meta, dtype)
                    if len(meta) != 0:
                        if len(meta) > 8:
                            meta = meta[0:8]
                        if len(meta) < 8:
                            meta = (b'\x00\x00\x00\x00\x00\x00\x00\x00' + meta)[-8:]
                        d.revSequence, = struct.unpack(SFD_REV_SEQ, meta)
                    else:
                        d.revSequence = 1

                    if seqno:
                        d.sequence = int(seqno)
                    if cmd == couchbaseConstants.CMD_TAP_MUTATION or cmd == couchbaseConstants.CMD_DCP_MUTATION:
                        try:
                            v = val
                            if dtype & 0x01:
                                d.contentType = couchstore.DocumentInfo.IS_JSON
                            # Why do this when we have a flag for it?
                            # if re.match('^\\s*{', v) and json.loads(v) is not None:
                            #     d.contentType = couchstore.DocumentInfo.IS_JSON
                        except ValueError:
                            pass # NON_JSON is already the default contentType.
                    elif cmd == couchbaseConstants.CMD_TAP_DELETE or cmd == couchbaseConstants.CMD_DCP_DELETE:
                        v = None
                    else:
                        self.future_done(future, f'error: SFDSink bad cmd: {cmd!s}')
                        store.close()
                        return

                    bulk_keys.append(d)
                    bulk_vals.append(v)

                try:
                    if bulk_keys and bulk_vals:
                        vm = vbucket_states.get(destination_vbucket_state, None)
                        if vm:
                            vi = vm.get(vbucket_id, None)
                            if vi:
                                c = int(vi.get("checkpoint_id", checkpoint_id))
                                checkpoint_id = max(checkpoint_id, c)
                                m = int(vi.get("max_deleted_seqno", max_deleted_seqno))
                                max_deleted_seqno = max(max_deleted_seqno, m)

                        rv = self.save_vbucket_state(store, vbucket_id,
                                                     destination_vbucket_state,
                                                     checkpoint_id,
                                                     max_deleted_seqno)
                        if rv != 0:
                            self.future_done(future, rv)
                            store.close()
                            return

                        store.saveMultiple(bulk_keys, bulk_vals,
                                           options=couchstore.CouchStore.COMPRESS)

                    store.commit()
                    store.close()
                except Exception as e:
                    self.future_done(future, f'error: could not save couchstore data; vbucket_id: {vbucket_id}; '
                                             f'store_path: {store_path}; exception: {e}')
                    return

            self.future_done(future, 0) # No return to keep looping.
Beispiel #4
0
    def provide_batch(self):
        """Provides a batch of messages, with GET/SET ratios and keys
           controlled by a mcsoda-inspired approach, but simpler."""
        if self.done:
            return 0, None

        cfg = self.source_map['cfg']
        prefix = cfg['prefix']
        max_items = cfg['max-items']
        ratio_sets = cfg['ratio-sets']
        exit_after_creates = cfg['exit-after-creates']
        low_compression = cfg['low-compression']
        itr = None
        collections = self.opts.collection
        if collections:
            itr = iter(collections)

        json = cfg['json']
        if not self.body:

            if low_compression:
                # Generate a document which snappy will struggle to compress.
                # Useful if your creating data-sets which utilise disk.
                random.seed(
                    0
                )  # Seed to a fixed value so we always have the same document pattern.
                document = ''.join(
                    random.choice(string.ascii_uppercase)
                    for _ in range(cfg['min-value-size']))
            else:
                # else a string of 0 is fine, but will compress very well.
                document = "0" * cfg['min-value-size']

            if json:
                self.body = '{"name": "%s%s", "age": %s, "index": %s,' + \
                            ' "body": "%s"}' % document
            else:
                self.body = document

        batch = pump.Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        vbucket_id = 0x0000ffff
        cas, exp, flg = 0, 0, 0

        while (batch.size() < batch_max_size
               and batch.bytes < batch_max_bytes):
            if ratio_sets >= float(self.cur_sets) / float(self.cur_ops or 1):
                self.cur_sets = self.cur_sets + 1
                cmd = couchbaseConstants.CMD_TAP_MUTATION
                if self.cur_items < max_items:
                    key = str(self.cur_items)
                    self.cur_items = self.cur_items + 1
                else:
                    key = str(self.cur_sets % self.cur_items)
            else:
                self.cur_gets = self.cur_gets + 1
                cmd = couchbaseConstants.CMD_GET
                key = str(self.cur_gets % self.cur_items)
            self.cur_ops = self.cur_ops + 1

            if json:
                value = self.body % (prefix, key, int(key) % 101, key)
            else:
                value = self.body

            # generate a collection key
            if itr:
                try:
                    cid = int(itr.next(), 16)
                except StopIteration:
                    itr = iter(collections)
                    cid = int(itr.next(), 16)

                encodedCid = encodeCollectionId(cid)
                # Generate the pack format and pack the key
                docKey = struct.pack(
                    "!" + str(len(encodedCid)) + "s" + str(len(prefix)) + "s" +
                    str(len(key)) + "s", encodedCid, prefix, key)
            else:
                docKey = prefix + key

            msg = (cmd, vbucket_id, docKey, flg, exp, cas, '', value, 0, 0, 0,
                   0)
            batch.append(msg, len(value))

            if exit_after_creates and self.cur_items >= max_items:
                self.done = True
                return 0, batch

        if batch.size() <= 0:
            return 0, None
        return 0, batch