Exemple #1
0
    def provide_batch(self):
        if self.done:
            return 0, None

        batch = pump.Batch(self)
        if self.file_iter == None:
            self.f = self.spec.replace(JSON_SCHEME, "")
            files = list()
            if os.path.isfile(self.f) and self.f.endswith(".zip"):
                zfobj = zipfile.ZipFile(self.f)
                self.working_dir = tempfile.mkdtemp()
                ZipUtil(zfobj).extractall(self.working_dir)
                JSONSource.enumerate_files(self.working_dir, files, False)
            elif os.path.isdir(self.f):
                JSONSource.enumerate_files(self.f, files, False)
            else:
                try:
                    fp = open(self.f, 'r')
                    dockey = JSONSource.gen_dockey(os.path.basename(self.f))
                    self.save_doc(batch, dockey, fp, True)
                    fp.close()
                except IOError, error:
                    return "error: could not open json: %s; exception: %s" % \
                        (self.f, e), None
                self.done = True
                return 0,batch
            if len(files) > 0:
                self.file_iter = iter(files)
Exemple #2
0
    def provide_batch(self):
        """Provides a batch of messages, with GET/SET ratios and keys
           controlled by a mcsoda-inspired approach, but simpler."""
        if self.done:
            return 0, None

        cfg = self.source_map['cfg']
        prefix = cfg['prefix']
        max_items = cfg['max-items']
        ratio_sets = cfg['ratio-sets']
        exit_after_creates = cfg['exit-after-creates']
        json = cfg['json']
        if not self.body:
            min_value_body = "0" * cfg['min-value-size']
            if json:
                self.body = '{"name": "%s%s", "age": %s, "index": %s,' + \
                            ' "body": "%s"}' % min_value_body
            else:
                self.body = min_value_body

        batch = pump.Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        vbucket_id = 0x0000ffff
        cas, exp, flg = 0, 0, 0

        while (batch.size() < batch_max_size
               and batch.bytes < batch_max_bytes):
            if ratio_sets >= float(self.cur_sets) / float(self.cur_ops or 1):
                self.cur_sets = self.cur_sets + 1
                cmd = memcacheConstants.CMD_TAP_MUTATION
                if self.cur_items < max_items:
                    key = self.cur_items
                    self.cur_items = self.cur_items + 1
                else:
                    key = self.cur_sets % self.cur_items
            else:
                self.cur_gets = self.cur_gets + 1
                cmd = memcacheConstants.CMD_GET
                key = self.cur_gets % self.cur_items
            self.cur_ops = self.cur_ops + 1

            if json:
                value = self.body % (prefix, key, key % 101, key)
            else:
                value = self.body
            msg = (cmd, vbucket_id, prefix + str(key), flg, exp, cas, '',
                   value)
            batch.append(msg, len(value))

            if exit_after_creates and self.cur_items >= max_items:
                self.done = True
                return 0, batch

        if batch.size() <= 0:
            return 0, None
        return 0, batch
Exemple #3
0
    def provide_batch(
            self
    ) -> Tuple[couchbaseConstants.PUMP_ERROR, Optional[pump.Batch]]:
        if self.done:
            return 0, None

        if not self.r:
            try:
                self.r = csv.reader(open(self.spec, 'r', encoding='utf-8'))
                self.fields = next(self.r)
                if not 'id' in self.fields:
                    return f'error: no \'id\' field in 1st line of csv: {self.spec}', None
            except StopIteration:
                return f'error: could not read 1st line of csv: {self.spec}', None
            except IOError as e:
                return f'error: could not open csv: {self.spec}; exception: {e!s}', None

        batch = pump.Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        cmd = couchbaseConstants.CMD_TAP_MUTATION
        vbucket_id = 0x0000ffff

        while (self.r and batch.size() < batch_max_size
               and batch.bytes < batch_max_bytes):
            try:
                vals = next(self.r)
                doc = {}
                for i, field in enumerate(self.fields):
                    if i >= len(vals):
                        continue
                    if field == 'id':
                        doc[field] = vals[i]
                    else:
                        doc[field] = number_try_parse(vals[i])
                if doc['id']:
                    msg: couchbaseConstants.BATCH_MSG = (cmd, vbucket_id,
                                                         doc['id'].encode(), 0,
                                                         0, 0, b'',
                                                         literal_eval(
                                                             doc['value']), 0,
                                                         0, 0, 0)
                    batch.append(msg, len(doc))
            except StopIteration:
                self.done = True
                self.r = None
            except Exception as e:
                logging.error(f'error: fails to read from csv file {e}')
                continue

        if batch.size() <= 0:
            return 0, None
        return 0, batch
Exemple #4
0
    def provide_batch(self):
        if self.done:
            return 0, None

        if not self.r:
            try:
                self.r = csv.reader(open(self.spec, 'r', encoding='utf-8'))
                self.fields = next(self.r)
                if not 'id' in self.fields:
                    return ("error: no 'id' field in 1st line of csv: %s" %
                            (self.spec)), None
            except StopIteration:
                return ("error: could not read 1st line of csv: %s" %
                        (self.spec)), None
            except IOError as e:
                return ("error: could not open csv: %s; exception: %s" %
                        (self.spec, e)), None

        batch = pump.Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        cmd = couchbaseConstants.CMD_TAP_MUTATION
        vbucket_id = 0x0000ffff

        while (self.r and batch.size() < batch_max_size
               and batch.bytes < batch_max_bytes):
            try:
                vals = next(self.r)
                doc = {}
                for i, field in enumerate(self.fields):
                    if i >= len(vals):
                        continue
                    if field == 'id':
                        doc[field] = vals[i]
                    else:
                        doc[field] = number_try_parse(vals[i])
                if doc['id']:
                    msg = (cmd, vbucket_id, doc['id'], 0, 0, 0, '',
                           doc['value'], 0, 0, 0, 0)
                    batch.append(msg, len(doc))
            except StopIteration:
                self.done = True
                self.r = None
            except Exception as e:
                logging.error("error: fails to read from csv file, %s", e)
                continue

        if batch.size() <= 0:
            return 0, None
        return 0, batch
Exemple #5
0
        def change_callback(doc_info):
            if doc_info:
                # Handle the new key name spacing for collections and co
                cid, key = decodeCollectionID(doc_info.id.encode())
                # Only support keys in the _default collection
                if cid != 0:
                    logging.debug('Skipping as not default collection')
                    return

                if self.skip(key, vbucket_id):
                    return

                if doc_info.deleted:
                    cmd = couchbaseConstants.CMD_DCP_DELETE
                    val = b''
                else:
                    cmd = couchbaseConstants.CMD_DCP_MUTATION
                    val = doc_info.getContents(
                        options=couchstore.CouchStore.DECOMPRESS)
                try:
                    rev_meta_bytes = doc_info.revMeta.get_bytes()
                    if len(rev_meta_bytes) == 18:
                        conf_res = 0
                        cas, exp, flg, flex_meta, dtype = struct.unpack(
                            SFD_REV_META, rev_meta_bytes)
                    elif len(rev_meta_bytes) == 19:
                        cas, exp, flg, flex_meta, dtype, conf_res = struct.unpack(
                            SFD_REV_META_PRE_4_6, rev_meta_bytes)
                    else:
                        raise ValueError(
                            'Does not match pre- or post-4.6 format')
                    meta = bytes([doc_info.revSequence])
                    seqno = doc_info.sequence
                    nmeta = 0
                    msg = (cmd, vbucket_id, key, flg, exp, cas, meta, val,
                           seqno, dtype, nmeta, conf_res)
                    abatch[0].append(msg, len(val))
                except Exception as e:
                    self.queue.put((
                        f'error: could not read couchstore file due to unsupported file format version;'
                        f' exception: {e}', None))
                    return

            if (abatch[0].size() >= batch_max_size
                    or abatch[0].bytes >= batch_max_bytes):
                self.queue.put((0, abatch[0]))
                abatch[0] = pump.Batch(self)
Exemple #6
0
    def provide_batch(
            self
    ) -> Tuple[couchbaseConstants.PUMP_ERROR, Optional[pump.Batch]]:
        if self.done:
            return 0, None

        # During the first iteration load the file names, this is only run once
        if not self.docs:
            self.prepare_docs()

        batch = pump.Batch(self)
        f = self.spec.replace(JSON_SCHEME, "")
        batch_max_size = self.opts.extra['batch_max_size']

        # Each iteration should return a batch or mark the loading a finished
        if os.path.isfile(f) and f.endswith(".zip"):
            zf = zipfile.ZipFile(f)
            while batch.size() < batch_max_size and self.docs:
                path = self.docs.pop()
                key = os.path.basename(path)
                if key.endswith('.json'):
                    key = key[:-5]
                value = zf.read(path)
                self.save_doc(batch, key, value)
            zf.close()
        else:
            while batch.size() < batch_max_size and self.docs:
                path = self.docs.pop()
                key = os.path.basename(path)
                if key.endswith('.json'):
                    key = key[:-5]
                try:
                    fp = open(path, 'rb')
                    value = fp.read()
                    fp.close()
                    self.save_doc(batch, key.encode(), value)
                except IOError as error:
                    logging.error(
                        f'Fail to load json file with error: {error!s}')

        if not self.docs:
            self.done = True

        return 0, batch
Exemple #7
0
        def change_callback(doc_info):
            if doc_info:
                key = doc_info.id
                if self.skip(key, vbucket_id):
                    return

                if doc_info.deleted:
                    cmd = memcacheConstants.CMD_TAP_DELETE
                    val = ''
                else:
                    cmd = memcacheConstants.CMD_TAP_MUTATION
                    val = doc_info.getContents(
                        options=couchstore.CouchStore.DECOMPRESS)

                cas, exp, flg = struct.unpack(SFD_REV_META, doc_info.revMeta)
                meta = struct.pack(SFD_REV_SEQ, doc_info.revSequence)
                msg = (cmd, vbucket_id, key, flg, exp, cas, meta, val)
                abatch[0].append(msg, len(val))

            if (abatch[0].size() >= batch_max_size
                    or abatch[0].bytes >= batch_max_bytes):
                self.queue.put((0, abatch[0]))
                abatch[0] = pump.Batch(self)
Exemple #8
0
    def loader(self):
        rv, d = data_dir(self.spec)
        if rv != 0:
            self.queue.put((rv, None))
            return

        source_vbucket_state = \
            getattr(self.opts, 'source_vbucket_state', 'active')

        source_nodes = self.source_bucket['nodes']
        if len(source_nodes) != 1:
            self.queue.put((
                f'error: expected 1 node in source_bucket: {self.source_bucket["name"]}',
                None))
            return

        vbucket_states = source_nodes[0].get('vbucket_states', None)
        if not vbucket_states:
            self.queue.put((
                f'error: missing vbucket_states in source_bucket: {self.source_bucket["name"]}',
                None))
            return

        vbuckets = vbucket_states.get(source_vbucket_state, None)
        if vbuckets is None:  # Empty dict is valid.
            self.queue.put((
                f'error: missing vbuckets in source_bucket: {self.source_bucket["name"]}',
                None))
            return

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        store = None
        vbucket_id = None

        # Level of indirection since we can't use python 3 nonlocal statement.
        abatch: List[pump.Batch] = [pump.Batch(self)]

        def change_callback(doc_info):
            if doc_info:
                # Handle the new key name spacing for collections and co
                cid, key = decodeCollectionID(doc_info.id.encode())
                # Only support keys in the _default collection
                if cid != 0:
                    logging.debug('Skipping as not default collection')
                    return

                if self.skip(key, vbucket_id):
                    return

                if doc_info.deleted:
                    cmd = couchbaseConstants.CMD_DCP_DELETE
                    val = b''
                else:
                    cmd = couchbaseConstants.CMD_DCP_MUTATION
                    val = doc_info.getContents(
                        options=couchstore.CouchStore.DECOMPRESS)
                try:
                    rev_meta_bytes = doc_info.revMeta.get_bytes()
                    if len(rev_meta_bytes) == 18:
                        conf_res = 0
                        cas, exp, flg, flex_meta, dtype = struct.unpack(
                            SFD_REV_META, rev_meta_bytes)
                    elif len(rev_meta_bytes) == 19:
                        cas, exp, flg, flex_meta, dtype, conf_res = struct.unpack(
                            SFD_REV_META_PRE_4_6, rev_meta_bytes)
                    else:
                        raise ValueError(
                            'Does not match pre- or post-4.6 format')
                    meta = bytes([doc_info.revSequence])
                    seqno = doc_info.sequence
                    nmeta = 0
                    msg = (cmd, vbucket_id, key, flg, exp, cas, meta, val,
                           seqno, dtype, nmeta, conf_res)
                    abatch[0].append(msg, len(val))
                except Exception as e:
                    self.queue.put((
                        f'error: could not read couchstore file due to unsupported file format version;'
                        f' exception: {e}', None))
                    return

            if (abatch[0].size() >= batch_max_size
                    or abatch[0].bytes >= batch_max_bytes):
                self.queue.put((0, abatch[0]))
                abatch[0] = pump.Batch(self)

        for f in latest_couch_files(f'{d}/{self.source_bucket["name"]}'):
            vbucket_id = int(re.match(SFD_RE, os.path.basename(f)).group(1))
            if not vbucket_id in vbuckets:
                continue

            try:
                store = couchstore.CouchStore(f, 'r')
                store.forEachChange(0, change_callback)
                store.close()
            except Exception as e:
                # MB-12270: Some files may be deleted due to compaction. We can
                # safely ignore them and move to next file.
                pass

        if abatch[0].size():
            self.queue.put((0, abatch[0]))
        self.queue.put((0, None))
Exemple #9
0
    def provide_dcp_batch_actual(
            self
    ) -> Tuple[couchbaseConstants.PUMP_ERROR, Optional[pump.Batch]]:
        batch = pump.Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']
        delta_ack_size = batch_max_bytes * 10 / 4  # ack every 25% of buffer size
        last_processed = 0
        total_bytes_read = 0

        vbid: int = 0
        start_seqno: int = 0
        end_seqno: int = 0
        vb_uuid: int = 0
        ss_start_seqno: int = 0
        ss_end_seqno: int = 0
        no_response_count: int = 0
        try:
            while (not self.dcp_done and batch.size() < batch_max_size
                   and batch.bytes < batch_max_bytes):

                if self.response.empty():
                    if len(self.stream_list) > 0:
                        logging.debug(
                            f'no response while there {len(self.stream_list)} active streams'
                        )
                        time.sleep(.25)
                        no_response_count = no_response_count + 1
                        # if not had a response after a minimum of 30 seconds then state we are done
                        if no_response_count == 120:
                            logging.warning(
                                f'no response for 30 seconds while there {len(self.stream_list)}'
                                ' active streams')
                            self.dcp_done = True
                    else:
                        self.dcp_done = True
                    continue

                no_response_count = 0
                unprocessed_size = total_bytes_read - last_processed
                if unprocessed_size > delta_ack_size:
                    rv = self.ack_buffer_size(unprocessed_size)
                    if rv:
                        logging.error(rv)
                    else:
                        last_processed = total_bytes_read

                cmd, errcode, opaque, cas, keylen, extlen, data, datalen, dtype, bytes_read = \
                    self.response.get()  # type: int, int, int, int, int, int, bytes, int, int, int
                total_bytes_read += bytes_read
                rv = 0
                metalen = flags = flg = exp = 0
                key = val = ext = b''
                need_ack = False
                seqno = 0
                if cmd == couchbaseConstants.CMD_DCP_REQUEST_STREAM:
                    total_bytes_read -= bytes_read
                    if errcode == couchbaseConstants.ERR_SUCCESS:
                        pair_index = (self.source_bucket['name'],
                                      self.source_node['hostname'])
                        start = 0
                        step = DCPStreamSource.HIGH_SEQNO_BYTE + DCPStreamSource.UUID_BYTE
                        while start + step <= datalen:
                            uuid, seqno = struct.unpack(
                                couchbaseConstants.DCP_VB_UUID_SEQNO_PKT_FMT,
                                data[start:start + step])
                            if pair_index not in self.cur['failoverlog']:
                                self.cur['failoverlog'][pair_index] = {}
                            if opaque not in self.cur['failoverlog'][pair_index] or \
                               not self.cur['failoverlog'][pair_index][opaque]:
                                self.cur['failoverlog'][pair_index][opaque] = [
                                    (uuid, seqno)
                                ]
                            else:
                                self.cur['failoverlog'][pair_index][
                                    opaque].append((uuid, seqno))
                            start = start + step
                    elif errcode == couchbaseConstants.ERR_KEY_ENOENT:
                        logging.warn(
                            "producer doesn't know about the vbucket uuid, rollback to 0"
                        )
                        vbid, flags, start_seqno, end_seqno, vb_uuid, ss_start_seqno, ss_end_seqno = \
                            self.stream_list[opaque]
                        del self.stream_list[opaque]
                    elif errcode == couchbaseConstants.ERR_KEY_EEXISTS:
                        logging.warning(
                            f'a stream exists on the connection for vbucket: {opaque}'
                        )
                    elif errcode == couchbaseConstants.ERR_NOT_MY_VBUCKET:
                        logging.warning(
                            f'Vbucket is not active anymore, skip it:{vbid!s}')
                        del self.stream_list[opaque]
                    elif errcode == couchbaseConstants.ERR_ERANGE:
                        logging.warning(
                            f'Start or end sequence numbers specified incorrectly,({start_seqno},'
                            f' {end_seqno})')
                        del self.stream_list[opaque]
                    elif errcode == couchbaseConstants.ERR_ROLLBACK:
                        vbid, flags, start_seqno, end_seqno, vb_uuid, ss_start_seqno, ss_stop_seqno = \
                            self.stream_list[opaque]
                        start_seqno, = struct.unpack(
                            couchbaseConstants.DCP_VB_SEQNO_PKT_FMT, data)
                        # find the most latest uuid, hi_seqno that fit start_seqno
                        if self.cur['failoverlog']:
                            pair_index = (self.source_bucket['name'],
                                          self.source_node['hostname'])
                            if self.cur['failoverlog'][pair_index].get("vbid"):
                                for uuid, seqno in self.cur['failoverlog'][
                                        pair_index][vbid]:
                                    if start_seqno >= seqno:
                                        vb_uuid = uuid
                                        break
                        ss_start_seqno = start_seqno
                        ss_end_seqno = start_seqno
                        self.request_dcp_stream(vbid, flags, start_seqno,
                                                end_seqno, vb_uuid,
                                                ss_start_seqno, ss_end_seqno)

                        del self.stream_list[opaque]
                        self.stream_list[opaque] = \
                            (vbid, flags, start_seqno, end_seqno, vb_uuid, ss_start_seqno, ss_end_seqno)
                    else:
                        logging.error(f'unprocessed errcode: {errcode}')
                        del self.stream_list[opaque]
                elif cmd == couchbaseConstants.CMD_DCP_MUTATION:
                    vbucket_id = errcode
                    seqno, rev_seqno, flg, exp, locktime, metalen, nru = \
                        struct.unpack(couchbaseConstants.DCP_MUTATION_PKT_FMT, data[0:extlen])
                    key_start = extlen
                    val_start = key_start + keylen
                    val_len = datalen - keylen - metalen - extlen
                    meta_start = val_start + val_len
                    key = data[extlen:val_start]
                    val = data[val_start:meta_start]
                    conf_res = 0
                    if meta_start < datalen:
                        # handle extra conflict resolution fields
                        extra_meta = data[meta_start:]
                        extra_index = 0
                        version = extra_meta[extra_index]
                        extra_index += 1
                        while extra_index < metalen:
                            id, extlen = struct.unpack(
                                couchbaseConstants.DCP_EXTRA_META_PKG_FMT,
                                extra_meta[extra_index:extra_index + 3])
                            extra_index += 3
                            if id == couchbaseConstants.DCP_EXTRA_META_CONFLICT_RESOLUTION:
                                if extlen == 1:
                                    conf_res, = struct.unpack(
                                        ">B",
                                        extra_meta[extra_index:extra_index +
                                                   1])
                                elif extlen == 2:
                                    conf_res, = struct.unpack(
                                        ">H",
                                        extra_meta[extra_index:extra_index +
                                                   2])
                                elif extlen == 4:
                                    conf_res, = struct.unpack(
                                        ">I",
                                        extra_meta[extra_index:extra_index +
                                                   4])
                                elif extlen == 8:
                                    conf_res, = struct.unpack(
                                        ">Q",
                                        extra_meta[extra_index:extra_index +
                                                   8])
                                else:
                                    logging.error(
                                        f'unsupported extra meta data format: {extlen:d}'
                                    )
                                    conf_res = 0
                            extra_index += extlen

                    if not self.skip(key, vbucket_id):
                        dtype, val = self.maybe_uncompress_value(dtype, val)
                        msg = (cmd, vbucket_id, key, flg, exp, cas,
                               rev_seqno.to_bytes(8, 'big'), val, seqno, dtype,
                               metalen, conf_res)
                        batch.append(msg, len(val))
                        self.num_msg += 1
                elif cmd in [
                        couchbaseConstants.CMD_DCP_DELETE,
                        couchbaseConstants.CMD_DCP_EXPIRATION
                ]:
                    vbucket_id = errcode
                    seqno, rev_seqno, metalen = struct.unpack(
                        couchbaseConstants.DCP_DELETE_PKT_FMT, data[0:extlen])
                    key_start = extlen
                    val_start = key_start + keylen
                    key = data[extlen:val_start]
                    # If the delete has the Xattr data type get the Xattrs from the body
                    if dtype & couchbaseConstants.DATATYPE_HAS_XATTR:
                        val = data[val_start:]
                    if not self.skip(key, vbucket_id):
                        dtype, val = self.maybe_uncompress_value(dtype, val)
                        msg = (cmd, vbucket_id, key, flg, exp, cas,
                               rev_seqno.to_bytes(8, 'big'), val, seqno, dtype,
                               metalen, 0)
                        batch.append(msg, len(val))
                        self.num_msg += 1
                    if cmd == couchbaseConstants.CMD_DCP_DELETE:
                        batch.adjust_size += 1
                elif cmd == couchbaseConstants.CMD_DCP_FLUSH:
                    total_bytes_read -= bytes_read
                    logging.warning("stopping: saw CMD_DCP_FLUSH")
                    self.dcp_done = True
                    break
                elif cmd == couchbaseConstants.CMD_DCP_END_STREAM:
                    del self.stream_list[opaque]
                    if not len(self.stream_list):
                        self.dcp_done = True
                elif cmd == couchbaseConstants.CMD_DCP_SNAPSHOT_MARKER:
                    ss_start_seqno, ss_end_seqno, _ = struct.unpack(
                        couchbaseConstants.DCP_SNAPSHOT_PKT_FMT,
                        data[0:extlen])
                    pair_index = (self.source_bucket['name'],
                                  self.source_node['hostname'])
                    if not self.cur['snapshot']:
                        self.cur['snapshot'] = {}
                    if pair_index not in self.cur['snapshot']:
                        self.cur['snapshot'][pair_index] = {}
                    self.cur['snapshot'][pair_index][opaque] = (ss_start_seqno,
                                                                ss_end_seqno)
                elif cmd == couchbaseConstants.CMD_DCP_NOOP:
                    total_bytes_read -= bytes_read
                    need_ack = True
                elif cmd == couchbaseConstants.CMD_DCP_BUFFER_ACK:
                    total_bytes_read -= bytes_read
                    if errcode != couchbaseConstants.ERR_SUCCESS:
                        logging.warning(
                            f'buffer ack response errcode: {errcode}')
                    continue
                else:
                    total_bytes_read -= bytes_read
                    logging.warning(f'warning: unexpected DCP message: {cmd}')
                    return f'unexpected DCP message: {cmd}', batch

                if need_ack:
                    self.ack_last = True
                    try:
                        if self.dcp_conn is not None:
                            self.dcp_conn._send_msg(
                                cmd,
                                b'',
                                b'',
                                opaque,
                                vbucket_id=0,
                                fmt=couchbaseConstants.RES_PKT_FMT,
                                magic=couchbaseConstants.RES_MAGIC_BYTE)
                    except socket.error:
                        return f'error: socket.error on sendall(); perhaps the source server:' \
                            f' {self.source_node["hostname"]} was rebalancing or had' \
                            f' connectivity/server problems', batch
                    except EOFError:
                        self.dcp_done = True
                        return f'error: EOFError on socket sendall(); perhaps the source server:' \
                            f' {self.source_node["hostname"]} was rebalancing or had ' \
                            f'connectivity/server problems', batch

                    # Close the batch when there's an ACK handshake, so
                    # the server can concurrently send us the next batch.
                    # If we are slow, our slow ACK's will naturally slow
                    # down the server.
                    self.ack_buffer_size(total_bytes_read - last_processed)
                    return 0, batch

                self.ack_last = False
                self.cmd_last = cmd

        except EOFError:
            if batch.size() <= 0 and self.ack_last:
                # A closed conn after an ACK means clean end of TAP dump.
                self.dcp_done = True

        if batch.size() <= 0:
            return 0, None
        self.ack_buffer_size(total_bytes_read - last_processed)
        return 0, batch
Exemple #10
0
    def provide_upr_batch_actual(self, upr_conn):
        batch = pump.Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']
        vbid = 0
        cmd = 0
        start_seqno = 0
        end_seqno = 0
        vb_uuid = 0
        hi_seqno = 0
        try:
            while (not self.upr_done and batch.size() < batch_max_size
                   and batch.bytes < batch_max_bytes):

                #if not self.queue.empty():
                #    vbid, cmd, start_seqno, end_seqno, vb_uuid, hi_seqno = self.queue.get()
                #    self.request_upr_stream(vbid, 0, start_seqno, end_seqno, vb_uuid, hi_seqno)

                if self.response.empty():
                    if len(self.stream_list) > 0:
                        time.sleep(.25)
                    else:
                        self.upr_done = True
                    continue
                cmd, errcode, opaque, cas, keylen, extlen, data, datalen, dtype = \
                    self.response.get()
                #self.recv_upr_msg(self.upr_conn.s)
                #print cmd, errcode, opaque, cas, keylen, extlen, data
                #assert opaque == int(vbid), "expected opaque '%s', got '%s'" % (vbid, opaque)
                rv = 0
                metalen = flags = flg = exp = 0
                key = val = ext = ''
                need_ack = False
                seqno = 0
                if cmd == couchbaseConstants.CMD_UPR_REQUEST_STREAM:
                    if errcode == couchbaseConstants.ERR_SUCCESS:
                        start = 0
                        step = UPRStreamSource.HIGH_SEQNO_BYTE + UPRStreamSource.UUID_BYTE
                        #while start+step <= datalen:
                        #    uuid, seqno = struct.unpack(couchbaseConstants.UPR_VB_UUID_SEQNO_PKT_FMT, \
                        #                    data[start:start + step])
                        #    #print "vbuuid: %s, seqno:%s" % (uuid, seqno)
                        #    start = start + step
                    elif errcode == couchbaseConstants.ERR_KEY_ENOENT:
                        logging.warn(
                            "producer doesn't know about the vbucket uuid, rollback to 0"
                        )
                        vbid, flags, start_seqno, end_seqno, vb_uuid, hi_seqno = self.stream_list[
                            opaque]
                        del self.stream_list[opaque]
                        print vbid, flags, start_seqno, end_seqno, vb_uuid, hi_seqno
                        #self.request_upr_stream(vbid, flags, start_seqno, end_seqno, 0, hi_seqno)
                    elif errcode == couchbaseConstants.ERR_KEY_EEXISTS:
                        logging.warn(
                            "a stream exists on the connection for vbucket:%s"
                            % opaque)
                    elif errcode == couchbaseConstants.ERR_NOT_MY_VBUCKET:
                        logging.warn(
                            "Vbucket is not active anymore, skip it:%s" % vbid)
                        del self.stream_list[opaque]
                    elif errcode == couchbaseConstants.ERR_ERANGE:
                        #logging.warn("Start and end sequence numbers are specified incorrectly,(%s, %s)" % \
                        #             (start_seqno, end_seqno))
                        del self.stream_list[opaque]
                    elif errcode == couchbaseConstants.ERR_ROLLBACK:
                        vbid, flags, start_seqno, end_seqno, vb_uuid, hi_seqno = self.stream_list[
                            opaque]
                        start_seqno = struck.unpack(
                            couchbaseConstants.UPR_VB_SEQNO_PKT_FMT, data)
                        logging.warn("rollback at %s" % start_seqno)
                        self.request_upr_stream(vbid, flags, start_seqno,
                                                end_seqno, 0, hi_seqno)

                        del self.stream_list[opaque]
                        self.stream_list[opaque] = (vbid, flags, start_seqno,
                                                    end_seqno, vb_uuid,
                                                    hi_seqno)
                elif cmd == couchbaseConstants.CMD_UPR_MUTATION:
                    vbucket_id = errcode
                    seqno, rev_seqno, flg, exp, locktime, metalen, nru = \
                        struct.unpack(couchbaseConstants.UPR_MUTATION_PKT_FMT, data[0:extlen])
                    key_start = extlen
                    val_start = key_start + keylen
                    key = data[extlen:val_start]
                    val = data[val_start:]
                    if not self.skip(key, vbucket_id):
                        msg = (cmd, vbucket_id, key, flg, exp, cas, rev_seqno,
                               val, seqno, dtype, metalen)
                        #print msg
                        batch.append(msg, len(val))
                        self.num_msg += 1
                elif cmd == couchbaseConstants.CMD_UPR_DELETION or \
                     cmd == couchbaseConstants.CMD_UPR_EXPIRATION:
                    vbucket_id = errcode
                    seqno, rev_seqno, metalen = \
                        struct.unpack(couchbaseConstants.UPR_DELETE_PKT_FMT, data[0:extlen])
                    key_start = extlen
                    val_start = key_start + keylen
                    key = data[extlen:val_start]
                    if not self.skip(key, vbucket_id):
                        msg = (cmd, vbucket_id, key, flg, exp, cas, rev_seqno,
                               val, seqno, dtype, metalen)
                        batch.append(msg, len(val))
                        self.num_msg += 1
                    if cmd == couchbaseConstants.CMD_UPR_DELETE:
                        batch.adjust_size += 1
                elif cmd == couchbaseConstants.CMD_UPR_FLUSH:
                    logging.warn("stopping: saw CMD_UPR_FLUSH")
                    self.upr_done = True
                    break
                elif cmd == couchbaseConstants.CMD_UPR_END_STREAM:
                    del self.stream_list[opaque]
                    if not len(self.stream_list):
                        self.upr_done = True
                elif cmd == couchbaseConstants.CMD_UPR_SNAPSHOT_MARKER:
                    logging.info("snapshot marker received, simply ignored:")
                else:
                    logging.warn("warning: unexpected UPR message: %s" % cmd)
                    return "unexpected UPR message: %s" % cmd, batch

                if need_ack:
                    self.ack_last = True
                    try:
                        upr_conn._sendMsg(
                            cmd,
                            '',
                            '',
                            opaque,
                            vbucketId=0,
                            fmt=couchbaseConstants.RES_PKT_FMT,
                            magic=couchbaseConstants.RES_MAGIC_BYTE)
                    except socket.error:
                        return (
                            "error: socket.error on send();"
                            " perhaps the source server: %s was rebalancing"
                            " or had connectivity/server problems" %
                            (self.source_node['hostname'])), batch
                    except EOFError:
                        self.upr_done = True
                        return (
                            "error: EOFError on socket send();"
                            " perhaps the source server: %s was rebalancing"
                            " or had connectivity/server problems" %
                            (self.source_node['hostname'])), batch

                    # Close the batch when there's an ACK handshake, so
                    # the server can concurrently send us the next batch.
                    # If we are slow, our slow ACK's will naturally slow
                    # down the server.
                    return 0, batch

                self.ack_last = False
                self.cmd_last = cmd

        except EOFError:
            if batch.size() <= 0 and self.ack_last:
                # A closed conn after an ACK means clean end of TAP dump.
                self.upr_done = True

        if batch.size() <= 0:
            return 0, None
        return 0, batch
Exemple #11
0
    def provide_batch(self):
        """Provides a batch of messages, with GET/SET ratios and keys
           controlled by a mcsoda-inspired approach, but simpler."""
        if self.done:
            return 0, None

        cfg = self.source_map['cfg']
        prefix = cfg['prefix']
        max_items = cfg['max-items']
        ratio_sets = cfg['ratio-sets']
        exit_after_creates = cfg['exit-after-creates']
        low_compression = cfg['low-compression']
        json = cfg['json']
        if not self.body:

            if low_compression:
                # Generate a document which snappy will struggle to compress.
                # Useful if your creating data-sets which utilise disk.
                random.seed(
                    0
                )  # Seed to a fixed value so we always have the same document pattern.
                document = ''.join(
                    random.choice(string.ascii_uppercase)
                    for _ in range(cfg['min-value-size']))
            else:
                # else a string of 0 is fine, but will compress very well.
                document = "0" * cfg['min-value-size']

            if json:
                self.body = '{"name": "%s%s", "age": %s, "index": %s,' + \
                            ' "body": "%s"}' % document
            else:
                self.body = document

        batch = pump.Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        vbucket_id = 0x0000ffff
        cas, exp, flg = 0, 0, 0

        while (batch.size() < batch_max_size
               and batch.bytes < batch_max_bytes):
            if ratio_sets >= float(self.cur_sets) / float(self.cur_ops or 1):
                self.cur_sets = self.cur_sets + 1
                cmd = couchbaseConstants.CMD_TAP_MUTATION
                if self.cur_items < max_items:
                    key = self.cur_items
                    self.cur_items = self.cur_items + 1
                else:
                    key = self.cur_sets % self.cur_items
            else:
                self.cur_gets = self.cur_gets + 1
                cmd = couchbaseConstants.CMD_GET
                key = self.cur_gets % self.cur_items
            self.cur_ops = self.cur_ops + 1

            if json:
                value = self.body % (prefix, key, key % 101, key)
            else:
                value = self.body
            msg = (cmd, vbucket_id, prefix + str(key), flg, exp, cas, '',
                   value, 0, 0, 0, 0)
            batch.append(msg, len(value))

            if exit_after_creates and self.cur_items >= max_items:
                self.done = True
                return 0, batch

        if batch.size() <= 0:
            return 0, None
        return 0, batch
Exemple #12
0
    def provide_batch(self) -> Tuple[couchbaseConstants.PUMP_ERROR, Optional[pump.Batch]]:
        """Provides a batch of messages, with GET/SET ratios and keys
           controlled by a mcsoda-inspired approach, but simpler."""
        if self.done:
            return 0, None

        cfg: Dict[str, Any] = self.source_map['cfg']
        prefix: str = cfg['prefix']
        max_items: int = cfg['max-items']
        ratio_sets: float = cfg['ratio-sets']
        exit_after_creates: bool = cfg['exit-after-creates']
        low_compression: bool = cfg['low-compression']
        xattrs: bool = cfg['xattr']
        itr = None
        collections = self.opts.collection
        if collections:
            itr = iter(collections)

        json_body: bool = cfg['json']
        if not self.body:

            if low_compression:
                # Generate a document which snappy will struggle to compress.
                # Useful if your creating data-sets which utilise disk.
                random.seed(0)  # Seed to a fixed value so we always have the same document pattern.
                document = ''.join(random.choice(string.ascii_uppercase) for _ in range(cfg['min-value-size']))
            else:
                # else a string of 0 is fine, but will compress very well.
                document = "0" * cfg['min-value-size']

            self.body = document

        batch = pump.Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        vbucket_id = 0x0000ffff
        cas, exp, flg = 0, 0, 0

        while (batch.size() < batch_max_size
               and batch.bytes < batch_max_bytes):
            if ratio_sets >= float(self.cur_sets) / float(self.cur_ops or 1):
                self.cur_sets = self.cur_sets + 1
                if xattrs:
                    cmd: int = couchbaseConstants.CMD_SUBDOC_MULTIPATH_MUTATION
                else:
                    cmd = couchbaseConstants.CMD_DCP_MUTATION
                if self.cur_items < max_items:
                    key = str(self.cur_items)
                    self.cur_items = self.cur_items + 1
                else:
                    key = str(self.cur_sets % self.cur_items)
            else:
                self.cur_gets = self.cur_gets + 1
                if xattrs:
                    cmd = couchbaseConstants.CMD_SUBDOC_MULTIPATH_LOOKUP
                else:
                    cmd = couchbaseConstants.CMD_GET
                key = str(self.cur_gets % self.cur_items)
            self.cur_ops = self.cur_ops + 1

            if json_body:
                value = f'{{"name": "{prefix}{key}", "age": {int(key) % 101}, "index": "{key}", "body":"{self.body}"}}'
            else:
                value = self.body

            if xattrs:
                value = json.dumps({"obj": value, "xattr_f": "field1", "xattr_v": "\"value1\""})

            value_bytes: bytes = value.encode()
            # generate a collection key
            if itr:
                try:
                    cid = int(next(itr), 16)
                except StopIteration:
                    itr = iter(collections)
                    cid = int(next(itr), 16)
                except ValueError as e:
                    return f'Invalid collection id, collection id must be a hexadecimal number: {e}', None

                encoded_cid = encode_collection_id(cid)
                # Generate the pack format and pack the key
                doc_key: bytes = struct.pack(
                    ("!" + str(len(encoded_cid)) + "s"
                        + str(len(prefix)) + "s"
                        + str(len(key)) + "s").encode(),
                    encoded_cid,
                    prefix.encode(),
                    key.encode())
            else:
                doc_key = prefix.encode() + key.encode()

            datatype = 0x00
            if json_body and cmd != couchbaseConstants.CMD_GET:
                datatype = 0x01

            msg: couchbaseConstants.BATCH_MSG = (cmd, vbucket_id, doc_key, flg, exp, cas, b'', value_bytes, 0, datatype,
                                                 0, 0)
            batch.append(msg, len(value_bytes))

            if exit_after_creates and self.cur_items >= max_items:
                self.done = True
                return 0, batch

        if batch.size() <= 0:
            return 0, None
        return 0, batch
Exemple #13
0
class CSVSource(pump.Source):
    """Reads csv file, where first line is field names and one field
       should be 'id'."""
    def __init__(self, opts, spec, source_bucket, source_node, source_map,
                 sink_map, ctl, cur):
        super(CSVSource, self).__init__(opts, spec, source_bucket, source_node,
                                        source_map, sink_map, ctl, cur)
        self.done = False
        self.r = None  # An iterator of csv.reader()

    @staticmethod
    def can_handle(opts, spec):
        return spec.endswith(".csv") and os.path.isfile(spec)

    @staticmethod
    def check(opts, spec):
        return 0, {
            'spec':
            spec,
            'buckets': [{
                'name': os.path.basename(spec),
                'nodes': [{
                    'hostname': 'N/A'
                }]
            }]
        }

    @staticmethod
    def provide_design(opts, source_spec, source_bucket, source_map):
        return 0, None

    def provide_batch(self):
        if self.done:
            return 0, None

        if not self.r:
            try:
                self.r = csv.reader(open(self.spec, 'rU'))
                self.fields = self.r.next()
                if not 'id' in self.fields:
                    return ("error: no 'id' field in 1st line of csv: %s" %
                            (self.spec)), None
            except StopIteration:
                return ("error: could not read 1st line of csv: %s" %
                        (self.spec)), None
            except IOError, e:
                return ("error: could not open csv: %s; exception: %s" %
                        (self.spec, e)), None

        batch = pump.Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        cmd = couchbaseConstants.CMD_TAP_MUTATION
        vbucket_id = 0x0000ffff
        cas, exp, flg = 0, 0, 0

        while (self.r and batch.size() < batch_max_size
               and batch.bytes < batch_max_bytes):
            try:
                vals = self.r.next()
                doc = {}
                for i, field in enumerate(self.fields):
                    if i >= len(vals):
                        continue
                    if field == 'id':
                        doc[field] = vals[i]
                    else:
                        doc[field] = number_try_parse(vals[i])
                if doc['id']:
                    doc_json = json.dumps(doc)
                    msg = (cmd, vbucket_id, doc['id'], flg, exp, cas, '',
                           doc_json, 0, 0, 0, 0)
                    batch.append(msg, len(doc))
            except StopIteration:
                self.done = True
                self.r = None
            except Exception, e:
                logging.error("error: fails to read from csv file, %s", e)
                continue
Exemple #14
0
class BSONSource(pump.Source):
    """Reads bson file."""

    def __init__(self, opts, spec, source_bucket, source_node,
                 source_map, sink_map, ctl, cur):
        super(BSONSource, self).__init__(opts, spec, source_bucket, source_node,
                                         source_map, sink_map, ctl, cur)
        self.done = False
        self.f = None

    @staticmethod
    def can_handle(opts, spec):
        return spec.startswith(BSON_SCHEME) and \
            os.path.isfile(spec.replace(BSON_SCHEME, ""))

    @staticmethod
    def check(opts, spec):
        return 0, {'spec': spec,
                   'buckets': [{'name': os.path.basename(spec),
                                'nodes': [{'hostname': 'N/A'}]}]}

    @staticmethod
    def provide_design(opts, source_spec, source_bucket, source_map):
        return 0, None

    def provide_batch(self):
        if self.done:
            return 0, None

        if not self.f:
            try:
                self.f = open(self.spec.replace(BSON_SCHEME, ""))
            except IOError, e:
                return "error: could not open bson: %s; exception: %s" % \
                    (self.spec, e), None

        batch = pump.Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        cmd = couchbaseConstants.CMD_TAP_MUTATION
        vbucket_id = 0x0000ffff
        cas, exp, flg = 0, 0, 0

        while (self.f and
               batch.size() < batch_max_size and
               batch.bytes < batch_max_bytes):
            doc_size_buf = self.f.read(4)
            if not doc_size_buf:
                self.done = True
                self.f.close()
                self.f = None
                break
            doc_size, = struct.unpack("<i", doc_size_buf)
            doc_buf = self.f.read(doc_size - 4)
            if not doc_buf:
                self.done = True
                self.f.close()
                self.f = None
                break
            doc = bson._elements_to_dict(doc_buf, dict, True)
            key = doc['_id']
            doc_json = json.dumps(doc)
            msg = (cmd, vbucket_id, key, flg, exp, cas, '', doc_json, 0, 0, 0)
            batch.append(msg, len(doc))

        if batch.size() <= 0:
            return 0, None
        return 0, batch
Exemple #15
0
    def provide_batch(self):
        if self.done:
            return 0, None

        batch = pump.Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        s = "SELECT cmd, vbucket_id, key, flg, exp, cas, meta, val FROM cbb_msg"

        if self.files is None: # None != [], as self.files will shrink to [].
            g = glob.glob(BFD.db_dir(self.spec,
                                     self.bucket_name(),
                                     self.node_name()) + "/data-*.cbb")
            self.files = sorted(g)
        try:
            while (not self.done and
                   batch.size() < batch_max_size and
                   batch.bytes < batch_max_bytes):
                if self.cursor_db is None:
                    if not self.files:
                        self.done = True
                        return 0, batch

                    rv, db = connect_db(self.files[0], self.opts, CBB_VERSION)
                    if rv != 0:
                        return rv, None
                    self.files = self.files[1:]

                    cursor = db.cursor()
                    cursor.execute(s)

                    self.cursor_db = (cursor, db)

                cursor, db = self.cursor_db

                row = cursor.fetchone()
                if row:
                    vbucket_id = row[1]
                    key = row[2]
                    val = row[7]

                    if self.skip(key, vbucket_id):
                        continue

                    msg = (row[0], row[1], row[2], row[3], row[4],
                           int(row[5]), # CAS as 64-bit integer not string.
                           row[6], row[7])
                    batch.append(msg, len(val))
                else:
                    if self.cursor_db:
                        self.cursor_db[0].close()
                        self.cursor_db[1].close()
                    self.cursor_db = None

            return 0, batch

        except Exception, e:
            self.done = True
            if self.cursor_db:
                self.cursor_db[0].close()
                self.cursor_db[1].close()
            self.cursor_db = None

            return "error: exception reading backup file: " + str(e), None
Exemple #16
0
    def provide_batch_actual(self, tap_conn):
        batch = pump.Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        try:
            while (not self.tap_done and
                   batch.size() < batch_max_size and
                   batch.bytes < batch_max_bytes):
                # TODO: (1) TAPDumpSource - provide_batch timeout on inactivity.

                rv, cmd, vbucket_id, key, flg, exp, cas, meta, val, \
                    opaque, need_ack = self.read_tap_conn(tap_conn)
                if rv != 0:
                    self.tap_done = True
                    return rv, batch

                if (cmd == memcacheConstants.CMD_TAP_MUTATION or
                    cmd == memcacheConstants.CMD_TAP_DELETE):
                    if not self.skip(key, vbucket_id):
                        msg = (cmd, vbucket_id, key, flg, exp, cas, meta, val)
                        batch.append(msg, len(val))
                        self.num_msg += 1
                elif cmd == memcacheConstants.CMD_TAP_OPAQUE:
                    pass
                elif cmd == memcacheConstants.CMD_NOOP:
                    # 1.8.x servers might not end the TAP dump on an empty bucket,
                    # so we treat 2 NOOP's in a row as the end and proactively close.
                    # Only do this when there've been no msgs to avoid closing
                    # during a slow backfill.
                    if (self.cmd_last == memcacheConstants.CMD_NOOP and
                        self.num_msg == 0 and
                        batch.size() <= 0):
                        self.tap_done = True
                        return 0, batch
                elif cmd == memcacheConstants.CMD_TAP_FLUSH:
                    logging.warn("stopping: saw CMD_TAP_FLUSH")
                    self.tap_done = True
                    break
                else:
                    s = str(pump.CMD_STR.get(cmd, cmd))
                    logging.warn("warning: unexpected TAP message: " + s)
                    return "unexpected TAP message: " + s, batch

                if need_ack:
                    self.ack_last = True
                    try:
                        tap_conn._sendMsg(cmd, '', '', opaque, vbucketId=0,
                                          fmt=memcacheConstants.RES_PKT_FMT,
                                          magic=memcacheConstants.RES_MAGIC_BYTE)
                    except socket.error:
                        return ("error: socket.error on send();"
                                " perhaps the source server: %s was rebalancing"
                                " or had connectivity/server problems" %
                                (self.source_node['hostname'])), batch
                    except EOFError:
                        self.tap_done = True
                        return ("error: EOFError on socket send();"
                                " perhaps the source server: %s was rebalancing"
                                " or had connectivity/server problems" %
                                (self.source_node['hostname'])), batch

                    # Close the batch when there's an ACK handshake, so
                    # the server can concurrently send us the next batch.
                    # If we are slow, our slow ACK's will naturally slow
                    # down the server.
                    return 0, batch

                self.ack_last = False
                self.cmd_last = cmd

        except EOFError:
            if batch.size() <= 0 and self.ack_last:
                # A closed conn after an ACK means clean end of TAP dump.
                self.tap_done = True

        if batch.size() <= 0:
            return 0, None

        return 0, batch
Exemple #17
0
    def provide_dcp_batch_actual(self):
        batch = pump.Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']
        delta_ack_size = batch_max_bytes * 10 / 4  #ack every 25% of buffer size
        last_processed = 0
        total_bytes_read = 0

        vbid = 0
        cmd = 0
        start_seqno = 0
        end_seqno = 0
        vb_uuid = 0
        hi_seqno = 0
        ss_start_seqno = 0
        ss_end_seqno = 0
        try:
            while (not self.dcp_done and batch.size() < batch_max_size
                   and batch.bytes < batch_max_bytes):

                if self.response.empty():
                    if len(self.stream_list) > 0:
                        logging.debug(
                            "no response while there %s active streams" %
                            len(self.stream_list))
                        time.sleep(.25)
                    else:
                        self.dcp_done = True
                    continue
                unprocessed_size = total_bytes_read - last_processed
                if unprocessed_size > delta_ack_size:
                    rv = self.ack_buffer_size(unprocessed_size)
                    if rv:
                        logging.error(rv)
                    else:
                        last_processed = total_bytes_read

                cmd, errcode, opaque, cas, keylen, extlen, data, datalen, dtype, bytes_read = \
                    self.response.get()
                total_bytes_read += bytes_read
                rv = 0
                metalen = flags = flg = exp = 0
                key = val = ext = ''
                need_ack = False
                seqno = 0
                if cmd == couchbaseConstants.CMD_DCP_REQUEST_STREAM:
                    if errcode == couchbaseConstants.ERR_SUCCESS:
                        pair_index = (self.source_bucket['name'],
                                      self.source_node['hostname'])
                        start = 0
                        step = DCPStreamSource.HIGH_SEQNO_BYTE + DCPStreamSource.UUID_BYTE
                        while start + step <= datalen:
                            uuid, seqno = struct.unpack(
                                            couchbaseConstants.DCP_VB_UUID_SEQNO_PKT_FMT, \
                                            data[start:start + step])
                            if pair_index not in self.cur['failoverlog']:
                                self.cur['failoverlog'][pair_index] = {}
                            if opaque not in self.cur['failoverlog'][pair_index] or \
                               not self.cur['failoverlog'][pair_index][opaque]:
                                self.cur['failoverlog'][pair_index][opaque] = [
                                    (uuid, seqno)
                                ]
                            else:
                                self.cur['failoverlog'][pair_index][
                                    opaque].append((uuid, seqno))
                            start = start + step
                    elif errcode == couchbaseConstants.ERR_KEY_ENOENT:
                        logging.warn(
                            "producer doesn't know about the vbucket uuid, rollback to 0"
                        )
                        vbid, flags, start_seqno, end_seqno, vb_uuid, ss_start_seqno, ss_end_seqno = \
                            self.stream_list[opaque]
                        del self.stream_list[opaque]
                    elif errcode == couchbaseConstants.ERR_KEY_EEXISTS:
                        logging.warn(
                            "a stream exists on the connection for vbucket:%s"
                            % opaque)
                    elif errcode == couchbaseConstants.ERR_NOT_MY_VBUCKET:
                        logging.warn(
                            "Vbucket is not active anymore, skip it:%s" % vbid)
                        del self.stream_list[opaque]
                    elif errcode == couchbaseConstants.ERR_ERANGE:
                        logging.warn("Start or end sequence numbers specified incorrectly,(%s, %s)" % \
                                     (start_seqno, end_seqno))
                        del self.stream_list[opaque]
                    elif errcode == couchbaseConstants.ERR_ROLLBACK:
                        vbid, flags, start_seqno, end_seqno, vb_uuid, ss_start_seqno, ss_stop_seqno = \
                            self.stream_list[opaque]
                        start_seqno, = struct.unpack(
                            couchbaseConstants.DCP_VB_SEQNO_PKT_FMT, data)
                        #find the most latest uuid, hi_seqno that fit start_seqno
                        if self.cur['failoverlog']:
                            pair_index = (self.source_bucket['name'],
                                          self.source_node['hostname'])
                            if self.cur['failoverlog'][pair_index].get("vbid"):
                                for uuid, seqno in self.cur['failoverlog'][
                                        pair_index][vbid]:
                                    if start_seqno >= seqno:
                                        vb_uuid = uuid
                                        break
                        ss_start_seqno = start_seqno
                        ss_end_seqno = start_seqno
                        self.request_dcp_stream(vbid, flags, start_seqno,
                                                end_seqno, vb_uuid,
                                                ss_start_seqno, ss_end_seqno)

                        del self.stream_list[opaque]
                        self.stream_list[opaque] = \
                            (vbid, flags, start_seqno, end_seqno, vb_uuid, ss_start_seqno, ss_end_seqno)
                    else:
                        logging.error("unprocessed errcode:%s" % errcode)
                        del self.stream_list[opaque]
                elif cmd == couchbaseConstants.CMD_DCP_MUTATION:
                    vbucket_id = errcode
                    seqno, rev_seqno, flg, exp, locktime, metalen, nru = \
                        struct.unpack(couchbaseConstants.DCP_MUTATION_PKT_FMT, data[0:extlen])
                    key_start = extlen
                    val_start = key_start + keylen
                    val_len = datalen - keylen - metalen - extlen
                    meta_start = val_start + val_len
                    key = data[extlen:val_start]
                    val = data[val_start:meta_start]
                    conf_res = 0
                    if meta_start < datalen:
                        # handle extra conflict resolution fields
                        extra_meta = data[meta_start:]
                        extra_index = 0
                        version = extra_meta[extra_index]
                        extra_index += 1
                        while extra_index < metalen:
                            id, extlen = struct.unpack(
                                couchbaseConstants.DCP_EXTRA_META_PKG_FMT,
                                extra_meta[extra_index:extra_index + 3])
                            extra_index += 3
                            if id == couchbaseConstants.DCP_EXTRA_META_CONFLICT_RESOLUTION:
                                if extlen == 1:
                                    conf_res, = struct.unpack(
                                        ">B",
                                        extra_meta[extra_index:extra_index +
                                                   1])
                                elif extlen == 2:
                                    conf_res, = struct.unpack(
                                        ">H",
                                        extra_meta[extra_index:extra_index +
                                                   2])
                                elif extlen == 4:
                                    conf_res, = struct.unpack(
                                        ">I",
                                        extra_meta[extra_index:extra_index +
                                                   4])
                                elif extlen == 8:
                                    conf_res, = struct.unpack(
                                        ">Q",
                                        extra_meta[extra_index:extra_index +
                                                   8])
                                else:
                                    logging.error(
                                        "unsupported extra meta data format:%d"
                                        % extlen)
                                    conf_res = 0
                            extra_index += extlen

                    if not self.skip(key, vbucket_id):
                        msg = (cmd, vbucket_id, key, flg, exp, cas, rev_seqno, val, seqno, dtype, \
                               metalen, conf_res)
                        batch.append(msg, len(val))
                        self.num_msg += 1
                elif cmd == couchbaseConstants.CMD_DCP_DELETE or \
                     cmd == couchbaseConstants.CMD_DCP_EXPIRATION:
                    vbucket_id = errcode
                    seqno, rev_seqno, metalen = \
                        struct.unpack(couchbaseConstants.DCP_DELETE_PKT_FMT, data[0:extlen])
                    key_start = extlen
                    val_start = key_start + keylen
                    key = data[extlen:val_start]
                    if not self.skip(key, vbucket_id):
                        msg = (cmd, vbucket_id, key, flg, exp, cas, rev_seqno, val, seqno, dtype, \
                               metalen, 0)
                        batch.append(msg, len(val))
                        self.num_msg += 1
                    if cmd == couchbaseConstants.CMD_DCP_DELETE:
                        batch.adjust_size += 1
                elif cmd == couchbaseConstants.CMD_DCP_FLUSH:
                    logging.warn("stopping: saw CMD_DCP_FLUSH")
                    self.dcp_done = True
                    break
                elif cmd == couchbaseConstants.CMD_DCP_END_STREAM:
                    del self.stream_list[opaque]
                    if not len(self.stream_list):
                        self.dcp_done = True
                elif cmd == couchbaseConstants.CMD_DCP_SNAPSHOT_MARKER:
                    ss_start_seqno, ss_end_seqno, _ = \
                        struct.unpack(couchbaseConstants.DCP_SNAPSHOT_PKT_FMT, data[0:extlen])
                    pair_index = (self.source_bucket['name'],
                                  self.source_node['hostname'])
                    if not self.cur['snapshot']:
                        self.cur['snapshot'] = {}
                    if pair_index not in self.cur['snapshot']:
                        self.cur['snapshot'][pair_index] = {}
                    self.cur['snapshot'][pair_index][opaque] = (ss_start_seqno,
                                                                ss_end_seqno)
                elif cmd == couchbaseConstants.CMD_DCP_NOOP:
                    need_ack = True
                elif cmd == couchbaseConstants.CMD_DCP_BUFFER_ACK:
                    if errcode != couchbaseConstants.ERR_SUCCESS:
                        logging.warning("buffer ack response errcode:%s" %
                                        errcode)
                    continue
                else:
                    logging.warn("warning: unexpected DCP message: %s" % cmd)
                    return "unexpected DCP message: %s" % cmd, batch

                if need_ack:
                    self.ack_last = True
                    try:
                        self.dcp_conn._sendMsg(
                            cmd,
                            '',
                            '',
                            opaque,
                            vbucketId=0,
                            fmt=couchbaseConstants.RES_PKT_FMT,
                            magic=couchbaseConstants.RES_MAGIC_BYTE)
                    except socket.error:
                        return (
                            "error: socket.error on send();"
                            " perhaps the source server: %s was rebalancing"
                            " or had connectivity/server problems" %
                            (self.source_node['hostname'])), batch
                    except EOFError:
                        self.dcp_done = True
                        return (
                            "error: EOFError on socket send();"
                            " perhaps the source server: %s was rebalancing"
                            " or had connectivity/server problems" %
                            (self.source_node['hostname'])), batch

                    # Close the batch when there's an ACK handshake, so
                    # the server can concurrently send us the next batch.
                    # If we are slow, our slow ACK's will naturally slow
                    # down the server.
                    self.ack_buffer_size(total_bytes_read - last_processed)
                    return 0, batch

                self.ack_last = False
                self.cmd_last = cmd

        except EOFError:
            if batch.size() <= 0 and self.ack_last:
                # A closed conn after an ACK means clean end of TAP dump.
                self.dcp_done = True

        if batch.size() <= 0:
            return 0, None
        self.ack_buffer_size(total_bytes_read - last_processed)
        return 0, batch
Exemple #18
0
    def loader(self):
        rv, d = data_dir(self.spec)
        if rv != 0:
            self.queue.put((rv, None))
            return

        source_vbucket_state = \
            getattr(self.opts, 'source_vbucket_state', 'active')

        source_nodes = self.source_bucket['nodes']
        if len(source_nodes) != 1:
            self.queue.put(("error: expected 1 node in source_bucket: %s" %
                            (self.source_bucket['name']), None))
            return

        vbucket_states = source_nodes[0].get('vbucket_states', None)
        if not vbucket_states:
            self.queue.put(
                ("error: missing vbucket_states in source_bucket: %s" %
                 (self.source_bucket['name']), None))
            return

        vbuckets = vbucket_states.get(source_vbucket_state, None)
        if vbuckets is None:  # Empty dict is valid.
            self.queue.put(("error: missing vbuckets in source_bucket: %s" %
                            (self.source_bucket['name']), None))
            return

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        store = None
        vbucket_id = None

        # Level of indirection since we can't use python 3 nonlocal statement.
        abatch = [pump.Batch(self)]

        def change_callback(doc_info):
            if doc_info:
                key = doc_info.id
                if self.skip(key, vbucket_id):
                    return

                if doc_info.deleted:
                    cmd = memcacheConstants.CMD_TAP_DELETE
                    val = ''
                else:
                    cmd = memcacheConstants.CMD_TAP_MUTATION
                    val = doc_info.getContents(
                        options=couchstore.CouchStore.DECOMPRESS)

                cas, exp, flg = struct.unpack(SFD_REV_META, doc_info.revMeta)
                meta = struct.pack(SFD_REV_SEQ, doc_info.revSequence)
                msg = (cmd, vbucket_id, key, flg, exp, cas, meta, val)
                abatch[0].append(msg, len(val))

            if (abatch[0].size() >= batch_max_size
                    or abatch[0].bytes >= batch_max_bytes):
                self.queue.put((0, abatch[0]))
                abatch[0] = pump.Batch(self)

        for f in latest_couch_files(d + '/' + self.source_bucket['name']):
            vbucket_id = int(re.match(SFD_RE, os.path.basename(f)).group(1))
            if not vbucket_id in vbuckets:
                continue

            try:
                store = couchstore.CouchStore(f, 'r')
            except Exception, e:
                self.queue.put(("error: could not open couchstore file: %s"
                                "; exception: %s" % (f, e), None))
                return

            store.forEachChange(0, change_callback)
            store.close()
Exemple #19
0
    def loader(self):
        rv, d = data_dir(self.spec)
        if rv != 0:
            self.queue.put((rv, None))
            return

        source_vbucket_state = \
            getattr(self.opts, 'source_vbucket_state', 'active')

        source_nodes = self.source_bucket['nodes']
        if len(source_nodes) != 1:
            self.queue.put(("error: expected 1 node in source_bucket: %s" %
                            (self.source_bucket['name']), None))
            return

        vbucket_states = source_nodes[0].get('vbucket_states', None)
        if not vbucket_states:
            self.queue.put(
                ("error: missing vbucket_states in source_bucket: %s" %
                 (self.source_bucket['name']), None))
            return

        vbuckets = vbucket_states.get(source_vbucket_state, None)
        if vbuckets is None:  # Empty dict is valid.
            self.queue.put(("error: missing vbuckets in source_bucket: %s" %
                            (self.source_bucket['name']), None))
            return

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        store = None
        vbucket_id = None

        # Level of indirection since we can't use python 3 nonlocal statement.
        abatch = [pump.Batch(self)]

        def change_callback(doc_info):
            if doc_info:
                key = doc_info.id
                if self.skip(key, vbucket_id):
                    return

                if doc_info.deleted:
                    cmd = couchbaseConstants.CMD_TAP_DELETE
                    val = ''
                else:
                    cmd = couchbaseConstants.CMD_TAP_MUTATION
                    val = doc_info.getContents(
                        options=couchstore.CouchStore.DECOMPRESS)
                try:
                    cas, exp, flg, flex_meta, dtype = struct.unpack(
                        SFD_REV_META, doc_info.revMeta)
                    meta = doc_info.revSequence
                    seqno = doc_info.sequence
                    nmeta = 0
                    msg = (cmd, vbucket_id, key, flg, exp, cas, meta, val,
                           seqno, dtype, nmeta, 0)
                    abatch[0].append(msg, len(val))
                except Exception, e:
                    self.queue.put((
                        "error: could not read couchstore file due to unsupported file format version;"
                        " exception: %s" % e, None))
                    return

            if (abatch[0].size() >= batch_max_size
                    or abatch[0].bytes >= batch_max_bytes):
                self.queue.put((0, abatch[0]))
                abatch[0] = pump.Batch(self)
Exemple #20
0
    def provide_batch(self):
        if self.done:
            return 0, None

        batch = pump.Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        s = ["SELECT cmd, vbucket_id, key, flg, exp, cas, meta, val FROM cbb_msg",
             "SELECT cmd, vbucket_id, key, flg, exp, cas, meta, val, seqno, dtype, meta_size FROM cbb_msg",
             "SELECT cmd, vbucket_id, key, flg, exp, cas, meta, val, seqno, dtype, meta_size, conf_res FROM cbb_msg"]

        if self.files is None: # None != [], as self.files will shrink to [].
            g =  glob.glob(BFD.db_dir(self.spec, self.bucket_name(), self.node_name()) + "/data-*.cbb")
            if not g:
                #check 3.0 file structure
                rv, file_list = BFDSource.list_files(self.opts,
                                                     self.spec,
                                                     self.bucket_name(),
                                                     self.node_name(),
                                                     "data-*.cbb")
                if rv != 0:
                    return rv, None
                from_date = getattr(self.opts, "from_date", None)
                if from_date:
                    from_date = datetime.datetime.strptime(from_date, "%Y-%m-%d")

                to_date = getattr(self.opts, "to_date", None)
                if to_date:
                    to_date = datetime.datetime.strptime(to_date, "%Y-%m-%d")
                g = []
                for f in file_list:
                    mtime = datetime.datetime.fromtimestamp(os.path.getmtime(f))
                    if (not from_date or mtime >= from_date) and (not to_date or mtime <= to_date):
                        g.append(f)
            self.files = sorted(g)
        try:
            ver = 0
            while (not self.done and
                   batch.size() < batch_max_size and
                   batch.bytes < batch_max_bytes):
                if self.cursor_db is None:
                    if not self.files:
                        self.done = True
                        return 0, batch

                    rv, db, ver = connect_db(self.files[0], self.opts, CBB_VERSION)
                    if rv != 0:
                        return rv, None
                    self.files = self.files[1:]

                    cursor = db.cursor()
                    cursor.execute(s[ver])

                    self.cursor_db = (cursor, db)

                cursor, db = self.cursor_db

                row = cursor.fetchone()
                if row:
                    vbucket_id = row[1]
                    key = row[2]
                    val = row[7]

                    if self.skip(key, vbucket_id):
                        continue
                    msg = (row[0], row[1], row[2], row[3], row[4],
                           int(row[5]), # CAS as 64-bit integer not string.
                           row[6], # revid as 64-bit integer too
                           row[7])
                    if ver == 2:
                        msg = msg + (row[8], row[9], row[10], row[11])
                    elif ver == 1:
                        msg = msg + (row[8], row[9], row[10], 0)
                    else:
                        msg = msg + (0, 0, 0, 0)
                    batch.append(msg, len(val))
                else:
                    if self.cursor_db:
                        self.cursor_db[0].close()
                        self.cursor_db[1].close()
                    self.cursor_db = None

            return 0, batch

        except Exception, e:
            self.done = True
            if self.cursor_db:
                self.cursor_db[0].close()
                self.cursor_db[1].close()
            self.cursor_db = None

            return "error: exception reading backup file: " + str(e), None
Exemple #21
0
    def provide_batch(self):
        """Provides a batch of messages, with GET/SET ratios and keys
           controlled by a mcsoda-inspired approach, but simpler."""
        if self.done:
            return 0, None

        cfg = self.source_map['cfg']
        prefix = cfg['prefix']
        max_items = cfg['max-items']
        ratio_sets = cfg['ratio-sets']
        exit_after_creates = cfg['exit-after-creates']
        low_compression = cfg['low-compression']
        xattrs = cfg['xattr']
        itr = None
        collections = self.opts.collection
        if collections:
            itr = iter(collections)

        json = cfg['json']
        if not self.body:

            if low_compression:
                # Generate a document which snappy will struggle to compress.
                # Useful if your creating data-sets which utilise disk.
                random.seed(
                    0
                )  # Seed to a fixed value so we always have the same document pattern.
                document = ''.join(
                    random.choice(string.ascii_uppercase)
                    for _ in range(cfg['min-value-size']))
            else:
                # else a string of 0 is fine, but will compress very well.
                document = "0" * cfg['min-value-size']

            if json:
                self.body = '{"name": "%s%s", "age": %s, "index": %s,' + \
                            ' "body": "%s"}' % document
            else:
                self.body = document

        batch = pump.Batch(self)

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        vbucket_id = 0x0000ffff
        cas, exp, flg = 0, 0, 0

        while (batch.size() < batch_max_size
               and batch.bytes < batch_max_bytes):
            if ratio_sets >= float(self.cur_sets) / float(self.cur_ops or 1):
                self.cur_sets = self.cur_sets + 1
                if xattrs:
                    cmd = couchbaseConstants.CMD_SUBDOC_MULTIPATH_MUTATION
                else:
                    cmd = couchbaseConstants.CMD_DCP_MUTATION
                if self.cur_items < max_items:
                    key = str(self.cur_items)
                    self.cur_items = self.cur_items + 1
                else:
                    key = str(self.cur_sets % self.cur_items)
            else:
                self.cur_gets = self.cur_gets + 1
                if xattrs:
                    cmd = couchbaseConstants.CMD_SUBDOC_MULTIPATH_LOOKUP
                else:
                    cmd = couchbaseConstants.CMD_GET
                key = str(self.cur_gets % self.cur_items)
            self.cur_ops = self.cur_ops + 1

            if json:
                value = self.body % (prefix, key, int(key) % 101, key)
            else:
                value = self.body

            if xattrs:
                value = {
                    "obj": value,
                    "xattr_f": "field1",
                    "xattr_v": "\"value1\""
                }

            # generate a collection key
            if itr:
                try:
                    cid = int(next(itr), 16)
                except StopIteration:
                    itr = iter(collections)
                    cid = int(next(itr), 16)

                encodedCid = encodeCollectionId(cid)
                # Generate the pack format and pack the key
                docKey = struct.pack(
                    "!" + str(len(encodedCid)) + "s" + str(len(prefix)) + "s" +
                    str(len(key)) + "s", encodedCid, prefix, key)
            else:
                docKey = prefix + key

            msg = (cmd, vbucket_id, docKey, flg, exp, cas, '', value, 0, 0, 0,
                   0)
            batch.append(msg, len(value))

            if exit_after_creates and self.cur_items >= max_items:
                self.done = True
                return 0, batch

        if batch.size() <= 0:
            return 0, None
        return 0, batch