Ejemplo n.º 1
0
    def consume_design(opts, sink_spec, sink_map, source_bucket, source_map,
                       source_design) -> couchbaseConstants.PUMP_ERROR:
        if not source_design:
            return 0

        try:
            sd = json.loads(source_design)
        except ValueError as e:
            return f'error: could not parse source_design: {source_design}'

        rv, d = data_dir(sink_spec)
        if rv != 0:
            return rv

        bucket_dir = f'{d}/{source_bucket["name"]}'
        if not os.path.isdir(bucket_dir):
            os.mkdir(bucket_dir)

        rv, store, store_path = \
            open_latest_store(bucket_dir,
                              "master.couch.*",
                              "^(master)\\.couch\\.([0-9]+)$",
                              "master.couch.1")
        if rv != 0:
            return rv

        bulk_keys = []
        bulk_vals = []

        if sd:
            for row in sd['rows']:
                logging.debug("design_doc row: " + str(row))

                doc_info = couchstore.DocumentInfo(str(row['id']))
                if '_rev' in row['doc']:
                    doc_info.revMeta = str(row['doc']['_rev'])
                    del row['doc']['_rev']
                doc_info.contentType = couchstore.DocumentInfo.IS_JSON

                bulk_keys.append(doc_info)
                bulk_vals.append(json.dumps(row['doc']))

            if bulk_keys and bulk_vals:
                store.saveMultiple(
                    bulk_keys,
                    bulk_vals)  # type: ignore # TODO: Compress ddocs?

        store.commit()  # type: ignore
        store.close()  # type: ignore
        return 0
Ejemplo n.º 2
0
    def run(self):
        destination_vbucket_state = \
            getattr(self.opts, 'destination_vbucket_state', 'active')

        vbucket_states = self.source_node.get('vbucket_states', {})

        while not self.ctl['stop']:
            batch, future = self.pull_next_batch()
            if not batch:
                return self.future_done(future, 0)

            vbuckets = batch.group_by_vbucket_id(SFD_VBUCKETS, self.rehash)
            for vbucket_id, msgs in vbuckets.items():
                checkpoint_id = 0
                max_deleted_seqno = 0

                rv, store, store_path = self.open_store(vbucket_id)
                if rv != 0:
                    return self.future_done(future, rv)

                bulk_keys = []
                bulk_vals = []

                for i, msg in enumerate(msgs):
                    cmd, _vbucket_id, key, flg, exp, cas, meta, val, seqno, dtype, nmeta, conf_res = msg
                    if self.skip(key, vbucket_id):
                        continue

                    d = couchstore.DocumentInfo(str(key))
                    flex_meta = 1
                    d.revMeta = struct.pack(SFD_REV_META, cas, exp, flg,
                                            flex_meta, dtype)
                    if meta:
                        meta = hex(meta)
                        if len(meta) > 8:
                            meta = meta[0:8]
                        if len(meta) < 8:
                            meta = ('\x00\x00\x00\x00\x00\x00\x00\x00' +
                                    meta)[-8:]
                        meta = meta.encode()
                        d.revSequence, = struct.unpack(SFD_REV_SEQ, meta)
                    else:
                        d.revSequence = 1

                    if seqno:
                        d.sequence = int(seqno)
                    if cmd == couchbaseConstants.CMD_TAP_MUTATION or cmd == couchbaseConstants.CMD_DCP_MUTATION:
                        v = str(val)
                        try:
                            if re.match('^\\s*{',
                                        v) and json.loads(v) is not None:
                                d.contentType = couchstore.DocumentInfo.IS_JSON
                        except ValueError:
                            pass  # NON_JSON is already the default contentType.
                    elif cmd == couchbaseConstants.CMD_TAP_DELETE or cmd == couchbaseConstants.CMD_DCP_DELETE:
                        v = None
                    else:
                        self.future_done(future,
                                         "error: SFDSink bad cmd: " + str(cmd))
                        store.close()
                        return

                    bulk_keys.append(d)
                    bulk_vals.append(v)

                try:
                    if bulk_keys and bulk_vals:
                        vm = vbucket_states.get(destination_vbucket_state,
                                                None)
                        if vm:
                            vi = vm.get(vbucket_id, None)
                            if vi:
                                c = int(vi.get("checkpoint_id", checkpoint_id))
                                checkpoint_id = max(checkpoint_id, c)
                                m = int(
                                    vi.get("max_deleted_seqno",
                                           max_deleted_seqno))
                                max_deleted_seqno = max(max_deleted_seqno, m)

                        rv = self.save_vbucket_state(
                            store, vbucket_id, destination_vbucket_state,
                            checkpoint_id, max_deleted_seqno)
                        if rv != 0:
                            self.future_done(future, rv)
                            store.close()
                            return

                        store.saveMultiple(
                            bulk_keys,
                            bulk_vals,
                            options=couchstore.CouchStore.COMPRESS)

                    store.commit()
                    store.close()
                except Exception as e:
                    self.future_done(
                        future, "error: could not save couchstore data"
                        "; vbucket_id: %s; store_path: %s"
                        "; exception: %s" % (vbucket_id, store_path, e))
                    return

            self.future_done(future, 0)  # No return to keep looping.
Ejemplo n.º 3
0
    def run(self):
        destination_vbucket_state = getattr(self.opts,
                                            'destination_vbucket_state',
                                            'active')

        vbucket_states = self.source_node.get('vbucket_states', {})

        while not self.ctl['stop']:
            batch, future = self.pull_next_batch(
            )  # type: pump.Batch, pump.SinkBatchFuture
            if not batch:
                return self.future_done(future, 0)

            vbuckets = batch.group_by_vbucket_id(SFD_VBUCKETS, self.rehash)
            for vbucket_id, msgs in vbuckets.items():
                checkpoint_id = 0
                max_deleted_seqno = 0

                rv, store, store_path = self.open_store(vbucket_id)
                if rv != 0:
                    return self.future_done(future, rv)

                bulk_keys = []
                bulk_vals = []

                for i, msg in enumerate(msgs):
                    cmd, _vbucket_id, key, flg, exp, cas, meta, val, seqno, dtype, nmeta, conf_res = msg
                    if self.skip(key, vbucket_id):
                        continue

                    # TODO: add default collection to all keys in CC this should change to have the correct collection
                    key = encodeCollectionId(0) + key
                    d = couchstore.DocumentInfo(key.decode())
                    flex_meta = 1
                    d.revMeta = struct.pack(SFD_REV_META, cas, exp, flg,
                                            flex_meta, dtype)
                    if len(meta) != 0:
                        if len(meta) > 8:
                            meta = meta[0:8]
                        if len(meta) < 8:
                            meta = (b'\x00\x00\x00\x00\x00\x00\x00\x00' +
                                    meta)[-8:]
                        d.revSequence, = struct.unpack(SFD_REV_SEQ, meta)
                    else:
                        d.revSequence = 1

                    if seqno:
                        d.sequence = int(seqno)
                    if cmd == couchbaseConstants.CMD_TAP_MUTATION or cmd == couchbaseConstants.CMD_DCP_MUTATION:
                        try:
                            v = val
                            if dtype & 0x01:
                                d.contentType = couchstore.DocumentInfo.IS_JSON
                            # Why do this when we have a flag for it?
                            # if re.match('^\\s*{', v) and json.loads(v) is not None:
                            #     d.contentType = couchstore.DocumentInfo.IS_JSON
                        except ValueError:
                            pass  # NON_JSON is already the default contentType.
                    elif cmd == couchbaseConstants.CMD_TAP_DELETE or cmd == couchbaseConstants.CMD_DCP_DELETE:
                        v = None
                    else:
                        self.future_done(future,
                                         f'error: SFDSink bad cmd: {cmd!s}')
                        store.close()
                        return

                    bulk_keys.append(d)
                    bulk_vals.append(v)

                try:
                    if bulk_keys and bulk_vals:
                        vm = vbucket_states.get(destination_vbucket_state,
                                                None)
                        if vm:
                            vi = vm.get(vbucket_id, None)
                            if vi:
                                c = int(vi.get("checkpoint_id", checkpoint_id))
                                checkpoint_id = max(checkpoint_id, c)
                                m = int(
                                    vi.get("max_deleted_seqno",
                                           max_deleted_seqno))
                                max_deleted_seqno = max(max_deleted_seqno, m)

                        rv = self.save_vbucket_state(
                            store, vbucket_id, destination_vbucket_state,
                            checkpoint_id, max_deleted_seqno)
                        if rv != 0:
                            self.future_done(future, rv)
                            store.close()
                            return

                        store.saveMultiple(
                            bulk_keys,
                            bulk_vals,
                            options=couchstore.CouchStore.COMPRESS)

                    store.commit()
                    store.close()
                except Exception as e:
                    self.future_done(
                        future,
                        f'error: could not save couchstore data; vbucket_id: {vbucket_id}; '
                        f'store_path: {store_path}; exception: {e}')
                    return

            self.future_done(future, 0)  # No return to keep looping.
Ejemplo n.º 4
0
        rv, store, store_path = \
            open_latest_store(bucket_dir,
                              "master.couch.*",
                              "^(master)\\.couch\\.([0-9]+)$",
                              "master.couch.1")
        if rv != 0:
            return rv

        bulk_keys = []
        bulk_vals = []

        if sd:
            for row in sd['rows']:
                logging.debug("design_doc row: " + str(row))

                d = couchstore.DocumentInfo(str(row['id']))
                if '_rev' in row['doc']:
                    d.revMeta = str(row['doc']['_rev'])
                    del row['doc']['_rev']
                d.contentType = couchstore.DocumentInfo.IS_JSON

                bulk_keys.append(d)
                bulk_vals.append(json.dumps(row['doc']))

            if bulk_keys and bulk_vals:
                store.saveMultiple(bulk_keys,
                                   bulk_vals)  # TODO: Compress ddocs?

        store.commit()
        store.close()
        return 0