예제 #1
0
파일: dcp_base.py 프로젝트: ritalrw/TAF
    def initialise_cluster_connections(self):
        init_dcp_client = self.initiate_connection()

        config_json = json.loads(DcpClient.get_config(init_dcp_client)[2])
        self.vb_map = config_json['vBucketServerMap']['vBucketMap']

        self.dcp_client_dict = dict()

        if self.log_path:
            self.log_path = os.path.normpath(self.log_path)
        self.dcp_log_data = LogData(self.log_path, self.vbuckets, self.keep_logs)

        # TODO: Remove globals and restructure (possibly into a class) to allow for multiple
        #       instances of the client (allowing multiple bucket connections)
        node_list = []
        for index, server in enumerate(config_json['vBucketServerMap']['serverList']):
            host = self.check_valid_host(server.split(':')[0], 'Server Config Cluster Map')
            node_list.append(host)
            port = config_json['nodesExt'][index]['services'].get('kv')

            if port is not None:
                node = '{0}:{1}'.format(host, port)
                if 'thisNode' in config_json['nodesExt'][index]:
                    self.dcp_client_dict[index] = {'stream': init_dcp_client,
                                              'node': node}
                else:
                    self.dcp_client_dict[index] = {'stream': self.initiate_connection(),
                                              'node': node}
        return init_dcp_client
예제 #2
0
def initialise_cluster_connections(args):
    init_dcp_client = initiate_connection(args)

    config_json = json.loads(DcpClient.get_config(init_dcp_client)[2])
    global vb_map
    vb_map = config_json['vBucketServerMap']['vBucketMap']

    global dcp_client_dict
    dcp_client_dict = {}

    global dcp_log_data
    if args.log_path:
        args.log_path = os.path.normpath(args.log_path)
    dcp_log_data = LogData(args.log_path, args.vbuckets, args.keep_logs)

    # TODO: Remove globals and restructure (possibly into a class) to allow for multiple
    #       instances of the client (allowing multiple bucket connections)
    node_list = []
    for index, server in enumerate(
            config_json['vBucketServerMap']['serverList']):
        temp_args = copy.deepcopy(args)
        host = check_valid_host(
            server.split(':')[0], 'Server Config Cluster Map')
        node_list.append(host)
        port = config_json['nodesExt'][index]['services'].get('kv')

        if port is not None:
            temp_args.node = '{0}:{1}'.format(host, port)
            if 'thisNode' in config_json['nodesExt'][index]:
                dcp_client_dict[index] = {
                    'stream': init_dcp_client,
                    'node': temp_args.node
                }
            else:
                dcp_client_dict[index] = {
                    'stream': initiate_connection(temp_args),
                    'node': temp_args.node
                }
예제 #3
0
class DCPBase(CollectionBase):
    def setUp(self):
        super(DCPBase, self).setUp()
        self.dictstore = {}
        self.verification_seqno = None
        self.collections = self.input.param("collections", True)
        self.xattrs = self.input.param("xattrs", False)
        self.compression = self.input.param("compression", 0)
        self.delete_times = self.input.param("delete_times", False)
        self.manifest = self.input.param("manifest", None)
        self.timeout = self.input.param("timeout", True)
        self.bucket = self.cluster.buckets[0]
        self.noop_interval = self.input.param("noop_interval", 0)
        self.opcode_dump = self.input.param("opcode_dump", False)
        self.enable_expiry = self.input.param("enable_expiry", False)
        self.enable_stream_id = self.input.param("enable_stream_id", False)
        #         vbuckets = self.input.param("vbuckets", 0)
        #         if len(vbuckets) == 1:
        self.vbuckets = range(1024)
        #         else:
        #             self.vbuckets = vbuckets.split(",")
        self.start_seq_no_list = self.input.param("start",
                                                  [0] * len(self.vbuckets))
        self.end_seq_no = self.input.param("end", 0xffffffffffffffff)
        self.vb_uuid_list = self.input.param("vb_uuid_list",
                                             ['0'] * len(self.vbuckets))
        self.vb_retry = self.input.param("retry_limit", 10)
        self.filter_file = self.input.param("filter", None)
        self.stream_req_info = self.input.param("stream_req_info", False)
        self.failover_logging = self.input.param("failover_logging", False)
        self.keep_logs = self.input.param("keep_logs", False)
        self.log_path = self.input.param("log_path", None)
        self.keys = self.input.param("keys", True)
        self.docs = self.input.param("docs", False)
        self.dcp_client = self.initialise_cluster_connections()
        self.output_string = list()

    def tearDown(self):
        super(DCPBase, self).tearDown()

    def initialise_cluster_connections(self):
        init_dcp_client = self.initiate_connection()

        config_json = json.loads(DcpClient.get_config(init_dcp_client)[2])
        self.vb_map = config_json['vBucketServerMap']['vBucketMap']

        self.dcp_client_dict = dict()

        if self.log_path:
            self.log_path = os.path.normpath(self.log_path)
        self.dcp_log_data = LogData(self.log_path, self.vbuckets,
                                    self.keep_logs)

        # TODO: Remove globals and restructure (possibly into a class) to allow for multiple
        #       instances of the client (allowing multiple bucket connections)
        node_list = []
        for index, server in enumerate(
                config_json['vBucketServerMap']['serverList']):
            host = self.check_valid_host(
                server.split(':')[0], 'Server Config Cluster Map')
            node_list.append(host)
            port = config_json['nodesExt'][index]['services'].get('kv')

            if port is not None:
                node = '{0}:{1}'.format(host, port)
                if 'thisNode' in config_json['nodesExt'][index]:
                    self.dcp_client_dict[index] = {
                        'stream': init_dcp_client,
                        'node': node
                    }
                else:
                    self.dcp_client_dict[index] = {
                        'stream': self.initiate_connection(),
                        'node': node
                    }
        return init_dcp_client

    def initiate_connection(self):
        self.host = self.cluster.master.ip
        self.port = int(11210)
        timeout = self.timeout

        dcp_client = DcpClient(self.host,
                               self.port,
                               timeout=timeout,
                               do_auth=False)
        self.log.info("DCP client created")

        try:
            response = dcp_client.sasl_auth_plain(
                self.cluster.master.rest_username,
                self.cluster.master.rest_password)
        except MemcachedError as err:
            self.log.info("DCP connection failure")

        self.check_for_features(dcp_client)

        dcp_client.bucket_select(str(self.bucket.name))
        self.log.info("Successfully AUTHed to %s" % self.bucket)

        name = "simple_dcp_client " + str(uuid.uuid4())
        response = dcp_client.open_producer(name,
                                            xattr=self.xattrs,
                                            delete_times=self.delete_times,
                                            collections=self.collections)
        assert response['status'] == SUCCESS
        self.log.info("Opened DCP consumer connection")

        response = dcp_client.general_control("enable_noop", "true")
        assert response['status'] == SUCCESS
        self.log.info("Enabled NOOP")

        if self.noop_interval:
            noop_interval = str(self.noop_interval)
            response2 = dcp_client.general_control("set_noop_interval",
                                                   noop_interval)
            assert response2['status'] == SUCCESS
            self.log.info("NOOP interval set to %s" % noop_interval)

        if self.opcode_dump:
            dcp_client.opcode_dump_control(True)

        if (self.compression > 1):
            response = dcp_client.general_control("force_value_compression",
                                                  "true")
            assert response['status'] == SUCCESS
            self.log.info("Forcing compression on connection")

        if self.enable_expiry:
            response = dcp_client.general_control("enable_expiry_opcode",
                                                  "true")
            assert response['status'] == SUCCESS
            self.log.info("Enabled Expiry Output")

        if self.enable_stream_id:
            response = dcp_client.general_control("enable_stream_id", "true")
            assert response['status'] == SUCCESS
            self.log.info("Enabled Stream-ID")
        return dcp_client

    def check_for_features(self, dcp_client):
        features = [HELO_XERROR]
        if self.xattrs:
            features.append(HELO_XATTR)
        if self.collections:
            features.append(HELO_COLLECTIONS)
        if self.compression:
            features.append(HELO_SNAPPY)
        resp = dcp_client.hello(features, "pydcp feature HELO")
        for feature in features:
            assert feature in resp

    def handle_stream_create_response(self, dcpStream):
        if dcpStream.status == SUCCESS:
            self.log.debug('Stream Opened Successfully on vb %s' %
                           dcpStream.vbucket)

            if self.failover_logging and not self.keep_logs:
                # keep_logs implies that there is a set of JSON log files
                self.dcp_log_data.upsert_failover(dcpStream.vbucket,
                                                  dcpStream.failover_log)
            return None

        elif dcpStream.status == ERR_NOT_MY_VBUCKET:
            self.log_failure('NOT MY VBUCKET -%s does not live on this node' \
                             % dcpStream.vbucket)
            # TODO: Handle that vbucket not entering the stream list

        elif dcpStream.status == ERR_ROLLBACK:
            self.log.info("Server requests Rollback to sequence number: %s" \
                          % dcpStream.rollback_seqno)
            dcpStream = self.handle_rollback(dcpStream)
            return dcpStream

        elif dcpStream.status == ERR_NOT_SUPPORTED:
            self.log_failure("Error: Stream Create Request Not Supported")

        else:
            self.log_failure("Unhandled Stream Create Response %s" \
                             % (dcpStream.status))

    def handleSystemEvent(self, response, manifest):
        # Unpack a DCP system event
        if response['event'] == EVENT_CREATE_COLLECTION:
            if response['version'] == 0:
                uid, sid, cid = struct.unpack(">QII", response['value'])
                uid = format(uid, 'x')
                sid = format(sid, 'x')
                cid = format(cid, 'x')
                string = "DCP Event: vb:{}, sid:{}, what:CollectionCREATED, name:{}, id:{}, scope:{}, manifest:{}," \
                         " seqno:{}".format(response['vbucket'],
                                            response['streamId'],
                                            response['key'],
                                            cid,
                                            sid,
                                            uid,
                                            response['by_seqno'])
                self.output_string.append(string)
                manifest['uid'] = uid
                for e in manifest['scopes']:
                    if e['uid'] == sid:
                        e['collections'].append({
                            'name': response['key'],
                            'uid': cid
                        })

            elif response['version'] == 1:
                uid, sid, cid, ttl = struct.unpack(">QIII", response['value'])
                uid = format(uid, 'x')
                sid = format(sid, 'x')
                cid = format(cid, 'x')
                string = "DCP Event: vb:{}, sid:{}, what:CollectionCREATED, name:{}, id:{}, scope:{}, ttl:{}, " \
                         "manifest:{}, seqno:{}".format(response['vbucket'],
                                                        response['streamId'],
                                                        response['key'],
                                                        cid,
                                                        sid,
                                                        ttl,
                                                        uid,
                                                        response['by_seqno'])
                self.output_string.append(string)
                manifest['uid'] = uid
                for e in manifest['scopes']:
                    if e['uid'] == sid:
                        e['collections'].append({
                            'name': response['key'],
                            'uid': cid,
                            'max_ttl': 0
                        })
            else:
                self.log.info("Unknown DCP Event version: %s" %
                              response['version'])

        elif response['event'] == EVENT_DELETE_COLLECTION:
            # We can receive delete collection without a corresponding create, this
            # will happen when only the tombstone of a collection remains
            uid, sid, cid = struct.unpack(">QII", response['value'])
            uid = format(uid, 'x')
            sid = format(sid, 'x')
            cid = format(cid, 'x')
            string = "DCP Event: vb:{}, sid:{}, what:CollectionDROPPED, id:{}, scope:{},  manifest:{}, " \
                     "seqno:{}".format(response['vbucket'],
                                       response['streamId'],
                                       cid, sid, uid,
                                       response['by_seqno'])
            self.output_string.append(string)
            manifest['uid'] = uid
            collections = []
            for e in manifest['scopes']:
                update = False
                for c in e['collections']:
                    if c['uid'] != cid:
                        collections.append(c)
                        update = True
                if update:
                    e['collections'] = collections
                    break

        elif response['event'] == EVENT_CREATE_SCOPE:
            uid, sid = struct.unpack(">QI", response['value'])
            uid = format(uid, 'x')
            sid = format(sid, 'x')
            string = "DCP Event: vb:{}, sid:{}, what:ScopeCREATED, name:{}, id:{}, manifest:{}, " \
                     "seqno:{}".format(response['vbucket'],
                                       response['streamId'],
                                       response['key'],
                                       sid,
                                       uid,
                                       response['by_seqno'])
            self.output_string.append(string)

            # Record the scope
            manifest['uid'] = uid
            manifest['scopes'].append({
                'uid': sid,
                'name': response['key'],
                'collections': []
            })

        elif response['event'] == EVENT_DELETE_SCOPE:
            # We can receive delete scope without a corresponding create, this
            # will happen when only the tombstone of a scope remains
            uid, sid = struct.unpack(">QI", response['value'])
            uid = format(uid, 'x')
            sid = format(sid, 'x')
            string = "DCP Event: vb:{}, sid:{}, what:ScopeDROPPED, id:{}, manifest:{}, " \
                     "seqno:{}".format(response['vbucket'], response['streamId'],
                                       sid, uid, response['by_seqno'])
            self.output_string.append(string)
            manifest['uid'] = uid
            scopes = []
            for e in manifest['scopes']:
                if e['uid'] != sid:
                    scopes.append(e)
            manifest['scopes'] = scopes
        else:
            self.log.info("Unknown DCP Event:%s " % response['event'])
        return manifest

    def handleMutation(self, response):
        vb = response['vbucket']
        seqno = response['by_seqno']
        action = response['opcode']
        sid = response['streamId']
        output_string = ""
        if self.keys:
            output_string += "KEY:" + response[
                'key'] + " from collection:" + str(
                    response['collection_id']) + ", vb:" + str(
                        vb) + " sid:" + str(sid) + " "
        if self.docs:
            output_string += "BODY:" + response['value']
        if self.xattrs:
            if 'xattrs' in response and response['xattrs'] != None:
                output_string += " XATTRS:" + response['xattrs']
            else:
                output_string += " XATTRS: - "
        if output_string != "":
            output_string = str(
                DCP_Opcode_Dictionary[action]) + " -> " + output_string
            return seqno, output_string

    def handleMarker(self, response):
        self.log.info("Snapshot Marker vb:{}, sid:{}, " \
                      "start:{}, end:{}, flag:{}".format(response['vbucket'],
                                                         response['streamId'],
                                                         response['snap_start_seqno'],
                                                         response['snap_end_seqno'],
                                                         response['flag']))
        return int(response['snap_start_seqno']), int(
            response['snap_end_seqno'])

    def checkSnapshot(self, vb, se, current, stream):
        if se == current:
            self.log.info("Snapshot for vb:%s has completed, end:%s, " \
                          "stream.mutation_count:%s" % (vb, se, stream.mutation_count))

    def process_dcp_traffic(self, streams):
        active_streams = len(streams)
        self.log.info("no of active streams %s" % active_streams)

        while active_streams > 0:

            for vb in streams:
                stream = vb['stream']
                if not vb['complete']:
                    if stream.has_response():
                        response = stream.next_response()
                        if response == None:
                            self.log.debug("No response from vbucket %s" %
                                           vb['id'])
                            vb['complete'] = True
                            active_streams -= 1
                            self.dcp_log_data.push_sequence_no(vb['id'])
                            continue

                        opcode = response['opcode']
                        if (opcode == CMD_MUTATION or opcode == CMD_DELETION
                                or opcode == CMD_EXPIRATION):
                            seqno, output_string = self.handleMutation(
                                response)
                            self.output_string.append(output_string)
                            if self.failover_logging:
                                self.dcp_log_data.upsert_sequence_no(
                                    response['vbucket'], response['by_seqno'])

                            vb['timed-out'] = self.vb_retry

                            self.checkSnapshot(response['vbucket'],
                                               vb['snap_end'],
                                               response['by_seqno'], stream)
                        elif opcode == CMD_SNAPSHOT_MARKER:
                            vb['snap_start'], vb[
                                'snap_end'] = self.handleMarker(response)
                        elif opcode == CMD_SYSTEM_EVENT:
                            vb['manifest'] = self.handleSystemEvent(
                                response, vb['manifest'])
                            self.checkSnapshot(response['vbucket'],
                                               vb['snap_end'],
                                               response['by_seqno'], stream)
                        elif opcode == CMD_STREAM_END:
                            self.log.info("Received stream end. Stream complete with " \
                                          "reason {}.".format(response['flags']))
                            vb['complete'] = True
                            active_streams -= 1
                            self.dcp_log_data.push_sequence_no(
                                response['vbucket'])
                        else:
                            self.log.info(
                                "Unexpected and unhandled opcode:{}".format(
                                    opcode))
                    else:
                        self.log.debug('No response')

                if vb['complete']:
                    # Second-tier timeout - after the stream close to allow other vbuckets to execute
                    if vb['timed-out'] > 0:
                        vb['complete'] = False
                        active_streams += 1
                        vb['timed-out'] -= 1
                    else:
                        if vb['stream_open']:
                            # Need to close stream to vb - TODO: use a function of mc client instead of raw socket
                            header = struct.pack(RES_PKT_FMT, REQ_MAGIC_BYTE,
                                                 CMD_CLOSE_STREAM, 0, 0, 0,
                                                 vb['id'], 0, 0, 0)
                            self.select_dcp_client(vb['id']).s.sendall(header)
                            vb['stream_open'] = False
                            if self.stream_req_info:
                                self.log.info('Stream to vbucket(s) closed' %
                                              str(vb['id']))

        # Dump each VB manifest if collections were enabled
        if self.collections:
            for vb in streams:
                self.log.info("vb:{} The following manifest state was created from the " \
                              "system events".format(vb['id']))
                self.log.info(
                    json.dumps(vb['manifest'], sort_keys=True, indent=2))
                break
        return self.output_string

    def add_streams(self,
                    vbuckets,
                    start,
                    end,
                    uuid,
                    retry_limit=15,
                    filter=None):
        self.vb_list = vbuckets
        self.start_seq_no_list = start
        self.end_seq_no = end
        self.vb_uuid_list = uuid
        self.vb_retry = retry_limit
        self.filter_file = filter
        filter_json = []
        streams = []

        # Filter is a file containing JSON, it can either be a single DCP
        # stream-request value, or an array of many values. Use of many values
        # is intended to be used in conjunction with enable_stream_id and sid
        if self.collections and self.filter_file != None:
            #             filter_file = open(self.filter_file, "r")
            #             jsonData = filter_file.read()
            parsed = json.loads(self.filter_file)

            # Is this an array or singular filter?
            if 'streams' in parsed:
                for f in parsed['streams']:
                    filter_json.append(json.dumps(f))
            else:
                # Assume entire document is the filter
                filter_json.append(self.filter_file)
            self.log.info("DCP Open filter: {}".format(filter_json))
        else:
            filter_json.append('')

        for f in filter_json:
            for index in xrange(0, len(self.vb_list)):
                if self.stream_req_info:
                    self.log.info('Stream to vbucket %s on node %s with seq no %s and uuid %s' \
                                  % (self.vb_list[index], self.get_node_of_dcp_client_connection(
                        self.vb_list[index]), self.start_seq_no_list[index],
                                     self.vb_uuid_list[index]))
                vb = int(self.vb_list[index])
                stream = self.select_dcp_client(vb).stream_req(
                    vbucket=vb,
                    takeover=0,
                    start_seqno=int(self.start_seq_no_list[index]),
                    end_seqno=self.end_seq_no,
                    vb_uuid=int(self.vb_uuid_list[index]),
                    json=f)
                handle_response = self.handle_stream_create_response(stream)
                if handle_response is None:
                    vb_stream = {
                        "id": self.vb_list[index],
                        "complete": False,
                        "keys_recvd": 0,
                        "timed-out": self.
                        vb_retry,  # Counts the amount of times that vb_stream gets timed out
                        "stream_open":
                        True,  # Details whether a vb stream is open to avoid repeatedly closing
                        "stream": stream,
                        # Set the manifest so we assume _default scope and collection exist
                        # KV won't replicate explicit create events for these items
                        "manifest": {
                            'scopes': [{
                                'uid':
                                "0",
                                'name':
                                '_default',
                                'collections': [{
                                    'uid': "0",
                                    'name': '_default'
                                }]
                            }],
                            'uid':
                            0
                        },
                        "snap_start": 0,
                        "snap_end": 0
                    }
                    streams.append(vb_stream)
                else:
                    for vb_stream in handle_response:
                        streams.append(vb_stream)
        return streams

    def handle_rollback(self, dcpStream):
        updated_dcpStreams = []
        vb = dcpStream.vbucket
        requested_rollback_no = dcpStream.rollback_seqno

        # If argument to use JSON log files
        if self.failover_logging:
            log_fetch = self.dcp_log_data.get_failover_logs([vb])
            if log_fetch.get(str(vb), None) is not None:
                # If the failover log is not empty, use it
                data = log_fetch[str(vb)]
                failover_values = sorted(data,
                                         key=lambda x: x[1],
                                         reverse=True)
            else:
                failover_fetch = DcpClient.get_failover_log(
                    self.select_dcp_client(vb), vb)
                failover_values = failover_fetch.get('value')

        # Otherwise get failover log from server
        else:
            failover_fetch = DcpClient.get_failover_log(
                self.select_dcp_client(vb), vb)
            failover_values = failover_fetch.get('value')

        new_seq_no = []
        new_uuid = []
        failover_seq_num = 0  # Default values so that if they don't get set
        failover_vbucket_uuid = 0  # inside the for loop, 0 is used.

        for failover_log_entry in failover_values:
            if failover_log_entry[1] <= requested_rollback_no:
                failover_seq_num = failover_log_entry[
                    1]  # closest Seq number in log
                failover_vbucket_uuid = failover_log_entry[0]  # and its UUID
                break
        new_seq_no.append(failover_seq_num)
        new_uuid.append(failover_vbucket_uuid)

        self.log.info(
            'Retrying stream add on vb %s with seqs %s and uuids %s' %
            (vb, new_seq_no, new_uuid))

        # NOTE: This can cause continuous rollbacks making client side recursive dependent on failover logs.
        self.log.info("check for recursive loop")
        return self.add_streams([vb], new_seq_no, self.end_seq_no, new_uuid,
                                self.vb_retry, self.filter_file)

    def select_dcp_client(self, vb):
        main_node_id = self.vb_map[vb][0]

        # TODO: Adding support if not my vbucket received to use replica node.
        # if len(vb_map[vb]) > 1:
        #     replica1_node_id = vb_map[vb][1]
        # if len(vb_map[vb]) > 2:
        #     replica2_node_id = vb_map[vb][2]
        # if len(vb_map[vb]) > 3:
        #     replica3_node_id = vb_map[vb][3]

        return self.dcp_client_dict[main_node_id]['stream']

    def check_valid_host(self, host, errorOrigin):
        separate_host = host.split('.')
        if len(separate_host) == 4:
            for num in separate_host:
                if int(num) < 0 or int(num) > 255:
                    raise IndexError(
                        "The inputted host (", host,
                        ") has an ip address outside the standardised range. "
                        "Error origin:", errorOrigin)
            return host
        else:
            if host == 'localhost':
                return host
            elif host == '$HOST':
                self.log.info("using localhost")
                return 'localhost'
            else:
                raise StandardError("Invalid host input", host,
                                    "Error origin:", errorOrigin)

    def get_node_of_dcp_client_connection(self, vb):
        node_id = self.vb_map[vb][0]
        return self.dcp_client_dict[node_id]['node']

    def all_vbucket_ids(self, type_=None):
        vb_ids = []
        response = self.mcd_client.stats('vbucket')
        assert len(response) > 0

        for vb in response:
            if vb != '' and (type_ is None or response[vb] == type_):
                vb_id = int(vb.split('_')[-1])
                vb_ids.append(vb_id)

        return vb_ids

    def wait_for_persistence(self, client):
        if self.bucket_type == 'ephemeral':
            return
        while int(self.get_stat(client, 'ep_queue_size')) > 0:
            self.sleep(1)
        while int(self.get_stat(client, 'ep_commit_num')) == 0:
            self.sleep(1)

    @staticmethod
    def get_stat(client, stat, type=''):
        response = client.stats(type)
        return response[stat]

    def wait_for_warmup(self, host, port):
        if self.bucket_type == 'ephemeral': return
        while True:
            client = McdClient(host, port)
            try:
                client.bucket_select("default")
                response = client.stats()
                # check the old style or new style (as of 4.5) results
                mode = response.get('ep_degraded_mode')
                if mode is not None:
                    if mode == '0' or mode == 'false':
                        break
            except Exception as ex:
                pass
            self.sleep(1)