Exemple #1
0
 def __create_client(self):
     socket = TSocket(self.__host,
                      self.__port,
                      socket_timeout=self.__timeout)
     self.__transport = TCyFramedTransportFactory().get_transport(socket)
     protocol = TCyBinaryProtocolFactory().get_protocol(self.__transport)
     self.__client = TClient(recs_thrift.RecommendationsService, protocol)
Exemple #2
0
    def _refresh_thrift_client(self):
        """Refresh the Thrift socket, transport, and client."""
        socket = TSocket(host=self.host, port=self.port, socket_timeout=self.timeout)

        self.transport = self._transport_class(socket)
        protocol = self._protocol_class(self.transport, decode_response=False)
        self.client = TClient(Hbase, protocol)
Exemple #3
0
    def __init__(self,
                 host,
                 port,
                 retry_interval,
                 report_status=None,
                 logging_timeout=None):
        # set up thrift and scribe objects
        timeout = logging_timeout if logging_timeout is not None else config.scribe_logging_timeout
        self.socket = thriftpy.transport.socket.TSocket(
            six.text_type(host), int(port))
        if timeout:
            self.socket.set_timeout(timeout)

        self.transport = TFramedTransportFactory().get_transport(self.socket)
        protocol = TBinaryProtocolFactory(strict_read=False).get_protocol(
            self.transport)
        self.client = TClient(scribe_thrift.scribe, protocol)

        # our own bookkeeping for connection
        self.connected = False  # whether or not we think we're currently connected to the scribe server
        self.last_connect_time = 0  # last time we got disconnected or failed to reconnect

        self.retry_interval = retry_interval
        self.report_status = report_status or get_default_reporter()
        self.__lock = threading.RLock()
        self._birth_pid = os.getpid()
Exemple #4
0
    def open(self):
        if self.transport and self.transport.is_open():
            logger.debug("transport was opened.")
            return
        logger.debug("new transport...")
        socket = TSocket(host=self.host, port=self.port, socket_timeout=self.timeout)
        self.transport = self.transport_class(socket)
        self.protocol = self.protocol_class(self.transport)
        self.client = TClient(hbase_thrift.Hbase, self.protocol)

        self.transport.open()
Exemple #5
0
    def _refresh_thrift_client(self):
        """Refresh the Thrift socket, transport, and client."""
        socket = TSocket(self.host, self.port)
        if self.timeout is not None:
            socket.set_timeout(self.timeout)

        self.transport = self._transport_class(socket)
        if self.use_kerberos:
            self.transport = TSaslClientTransport(self.transport, self.host,
                                                  self.sasl_service_name)
        protocol = self._protocol_class(self.transport, decode_response=False)
        self.client = TClient(Hbase, protocol)
Exemple #6
0
def client_context(service, host="localhost", port=9090, unix_socket=None,
                   proto_factory=TBinaryProtocolFactory(),
                   trans_factory=TBufferedTransportFactory(),
                   timeout=None, socket_timeout=3000, connect_timeout=3000,
                   cafile=None, ssl_context=None, certfile=None, keyfile=None):
    if timeout:
        warnings.warn("`timeout` deprecated, use `socket_timeout` and "
                      "`connect_timeout` instead.")
        socket_timeout = connect_timeout = timeout

    if unix_socket:
        socket = TSocket(unix_socket=unix_socket,
                         connect_timeout=connect_timeout,
                         socket_timeout=socket_timeout)
    elif host and port:
        socket = TSocket(host, port,
                         connect_timeout=connect_timeout,
                         socket_timeout=socket_timeout)
    else:
        raise ValueError("Either host/port or unix_socket must be provided.")

    try:
        transport = trans_factory.get_transport(socket)
        protocol = proto_factory.get_protocol(transport)
        transport.open()
        yield TClient(service, protocol)

    finally:
        transport.close()
Exemple #7
0
    def _refresh_thrift_client(self):
        """Refresh the Thrift socket, transport, and client."""
        socket = TSocket(host=self.host, port=self.port, socket_timeout=self.timeout)

        self.transport = self._transport_class(socket)
        protocol = self._protocol_class(self.transport, decode_response=False)
        self.client = TClient(Hbase, protocol)
Exemple #8
0
def client_context(service,
                   host="localhost",
                   port=9090,
                   unix_socket=None,
                   proto_factory=TBinaryProtocolFactory(),
                   trans_factory=TBufferedTransportFactory(),
                   timeout=None):
    if unix_socket:
        socket = TSocket(unix_socket=unix_socket)
    elif host and port:
        socket = TSocket(host, port)
    else:
        raise ValueError("Either host/port or unix_socket must be provided.")

    if timeout:
        socket.set_timeout(timeout)

    try:

        transport = trans_factory.get_transport(socket)
        protocol = proto_factory.get_protocol(transport)
        transport.open()
        yield TClient(service, protocol)

    finally:
        transport.close()
 def get_new_client(self):
     """
     轮询在每个ip:port的连接池中获取连接(线程安全)
     从当前队列右侧取出ip:port信息,获取client
     将连接池对象放回到当前队列的左侧
     请求或连接超时时间,默认30秒
     :return:
     """
     with self.lock:
         if self.pool_size < self.maxActive:
             try:
                 ip = self.load_balance_queue.pop()
             except IndexError:
                 raise CTECThriftClientError('没有可用的服务提供者列表!')
             if ip:
                 self.load_balance_queue.appendleft(ip)
                 # 创建新的thrift client
                 t_socket = TSocket(ip.split(':')[0], int(ip.split(':')[1]),
                                    socket_timeout=1000 * self.socket_timeout)
                 proto_factory = TBinaryProtocolFactory()
                 trans_factory = TBufferedTransportFactory()
                 transport = trans_factory.get_transport(t_socket)
                 protocol = proto_factory.get_protocol(transport)
                 transport.open()
                 client = TClient(self.service, protocol)
                 self.pool_size += 1
             return client
         else:
             return None
Exemple #10
0
def make_client(service,
                host="localhost",
                port=9090,
                unix_socket=None,
                proto_factory=TBinaryProtocolFactory(),
                trans_factory=TBufferedTransportFactory(),
                timeout=None,
                cafile=None,
                ssl_context=None,
                certfile=None,
                keyfile=None):
    if unix_socket:
        socket = TSocket(unix_socket=unix_socket)
        if certfile:
            warnings.warn("SSL only works with host:port, not unix_socket.")
    elif host and port:
        if cafile or ssl_context:
            socket = TSSLSocket(host,
                                port,
                                socket_timeout=timeout,
                                cafile=cafile,
                                certfile=certfile,
                                keyfile=keyfile,
                                ssl_context=ssl_context)
        else:
            socket = TSocket(host, port, socket_timeout=timeout)
    else:
        raise ValueError("Either host/port or unix_socket must be provided.")

    transport = trans_factory.get_transport(socket)
    protocol = proto_factory.get_protocol(transport)
    transport.open()
    return TClient(service, protocol)
Exemple #11
0
 def __getattr__(self, item):
     socket = TSocket(self.host, self.port)
     socket.set_timeout(self.timeout)
     transport = TRANS_FACTORY().get_transport(socket)
     protocol = PROTO_FACTORY().get_protocol(transport)
     client = TClient(self.service, protocol)
     attr = getattr(client, item)
     return _wrapper_api(attr, transport)
Exemple #12
0
 def get_tclient(self, service, protocol):
     if self.tracking is True:
         from thriftpy.contrib.tracking import TTrackedClient
         client = TTrackedClient(self.tracker_factory, service, protocol)
     else:
         from thriftpy.thrift import TClient
         client = TClient(service, protocol)
     return client
Exemple #13
0
def client_context(service, host, port,
                   proto_factory=TBinaryProtocolFactory()):
    try:
        transport = TBufferedTransport(TSocket(host, port))
        protocol = proto_factory.get_protocol(transport)
        transport.open()
        yield TClient(service, protocol)
    finally:
        transport.close()
Exemple #14
0
def make_client(service, host, port,
                proto_factory=TBinaryProtocolFactory(),
                trans_factory=TBufferedTransportFactory(),
                timeout=None):
    socket = TSocket(host, port)
    if timeout:
        socket.set_timeout(timeout)
    transport = trans_factory.get_transport(socket)
    protocol = proto_factory.get_protocol(transport)
    transport.open()
    return TClient(service, protocol)
Exemple #15
0
def make_client(service,
                host,
                port,
                proto_factory=TBinaryProtocolFactory(),
                trans_factory=TFramedTransportFactory()):

    socket = TNonBlockingSocket(host, port)
    transport = trans_factory.get_transport(socket)
    protocol = proto_factory.get_protocol(transport)
    transport.open()
    return TClient(service, protocol)
Exemple #16
0
 def __init__(self, service, socket_config, service_name=None):
     trans_socket = TSocket(**socket_config)
     self.__transport = TBufferedTransportFactory()\
         .get_transport(trans_socket)
     if service_name:
         protocol_factory = TMultiplexedProtocolFactory(
             TBinaryProtocolFactory(), service_name)
     else:
         protocol_factory = TBinaryProtocolFactory()
     protocol = protocol_factory.get_protocol(self.__transport)
     self.__client = TClient(service, protocol)
     self.__is_open = False
Exemple #17
0
    def _refresh_thrift_client(self):
        """Refresh the Thrift socket, transport, and client."""
        socket = TSocket(host=self.host,
                         port=self.port,
                         socket_timeout=self.timeout)

        self.transport = self._transport_class(socket)
        if self.use_kerberos:
            self.transport = TSaslClientTransport(self.transport, self.host,
                                                  self.sasl_service_name)
            sasl_auth = 'GSSAPI'

            def sasl_factory():
                sasl_client = sasl.Client()
                sasl_client.setAttr('host', self.host)
                sasl_client.setAttr('service', self.sasl_service_name)
                sasl_client.init()
                return sasl_client

            self.transport = TSaslClientTransport(sasl_factory, sasl_auth,
                                                  socket)
        protocol = self._protocol_class(self.transport, decode_response=False)
        self.client = TClient(Hbase, protocol)
Exemple #18
0
def client_context(service, host, port,
                   proto_factory=TBinaryProtocolFactory(),
                   trans_factory=TBufferedTransportFactory(),
                   timeout=None):
    try:
        socket = TSocket(host, port)
        if timeout:
            socket.set_timeout(timeout)
        transport = trans_factory.get_transport(socket)
        protocol = proto_factory.get_protocol(transport)
        transport.open()
        yield TClient(service, protocol)
    finally:
        transport.close()
Exemple #19
0
def make_client(service,
                host,
                port,
                path='',
                scheme='http',
                proto_factory=TBinaryProtocolFactory(),
                trans_factory=TBufferedTransportFactory(),
                ssl_context_factory=None,
                timeout=DEFAULT_HTTP_CLIENT_TIMEOUT_MS):
    uri = HTTP_URI.format(scheme=scheme, host=host, port=port, path=path)
    http_socket = THttpClient(uri, timeout, ssl_context_factory)
    transport = trans_factory.get_transport(http_socket)
    iprot = proto_factory.get_protocol(transport)
    transport.open()
    return TClient(service, iprot)
Exemple #20
0
def make_client(service, host="localhost", port=9090, unix_socket=None,
                proto_factory=TBinaryProtocolFactory(),
                trans_factory=TBufferedTransportFactory(),
                timeout=None):
    if unix_socket:
        socket = TSocket(unix_socket=unix_socket)
    elif host and port:
        socket = TSocket(host, port, socket_timeout=timeout)
    else:
        raise ValueError("Either host/port or unix_socket must be provided.")

    transport = trans_factory.get_transport(socket)
    protocol = proto_factory.get_protocol(transport)
    transport.open()
    return TClient(service, protocol)
Exemple #21
0
class ScribeLogger(object):
    """Implementation that logs to a scribe server. If errors are encountered,
    drop lines and retry occasionally.

    :param host: hostname of the scribe server
    :param port: port number of the scribe server
    :param retry_interval: number of seconds to wait between retries
    :param report_status: a function `report_status(is_error, msg)` which is
        called to print out errors and status messages. The first
        argument indicates whether what is being printed is an error or not,
        and the second argument is the actual message.
    :param logging_timeout: milliseconds to time out scribe logging; "0" means
        blocking (no timeout)
    """
    def __init__(self,
                 host,
                 port,
                 retry_interval,
                 report_status=None,
                 logging_timeout=None):
        # set up thrift and scribe objects
        timeout = logging_timeout if logging_timeout is not None else config.scribe_logging_timeout
        self.socket = thriftpy.transport.socket.TSocket(
            six.text_type(host), int(port))
        if timeout:
            self.socket.set_timeout(timeout)

        self.transport = TFramedTransportFactory().get_transport(self.socket)
        protocol = TBinaryProtocolFactory(strict_read=False).get_protocol(
            self.transport)
        self.client = TClient(scribe_thrift.scribe, protocol)

        # our own bookkeeping for connection
        self.connected = False  # whether or not we think we're currently connected to the scribe server
        self.last_connect_time = 0  # last time we got disconnected or failed to reconnect

        self.retry_interval = retry_interval
        self.report_status = report_status or get_default_reporter()
        self.__lock = threading.RLock()
        self._birth_pid = os.getpid()

    def _maybe_reconnect(self):
        """Try (re)connecting to the server if it's been long enough since our
        last attempt.
        """
        assert self.connected == False

        # don't retry too often
        now = time.time()
        if (now - self.last_connect_time) > self.retry_interval:
            try:
                self.transport.open()
                self.connected = True
            except TTransportException:
                self.last_connect_time = now
                self.report_status(
                    True, 'yelp_clog failed to connect to scribe server')

    def _log_line_no_size_limit(self, stream, line):
        """Log a single line without size limit. It should not include any newline characters.
           Since this method is called in log_line, the line should be in utf-8 format and
           less than MAX_LINE_SIZE_IN_BYTES already. We don't limit traceback size.
        """
        with self.__lock:
            if os.getpid() != self._birth_pid:
                raise ScribeIsNotForkSafeError
            if not self.connected:
                self._maybe_reconnect()

            if self.connected:
                log_entry = scribe_thrift.LogEntry(category=scribify(stream),
                                                   message=line + b'\n')
                try:
                    return self.client.Log(messages=[log_entry])
                except Exception as e:
                    try:
                        self.report_status(
                            True,
                            'yelp_clog failed to log to scribe server with '
                            ' exception: %s(%s)' % (type(e), six.text_type(e)))
                    finally:
                        self.close()
                        self.last_connect_time = time.time()

                    # Don't reconnect if report_status raises an exception
                    self._maybe_reconnect()

    def log_line(self, stream, line):
        """Log a single line. It should not include any newline characters.
           If the line size is over 50 MB, an exception raises and the line will be dropped.
           If the line size is over 5 MB, a message consisting origin stream information
           will be recorded at WHO_CLOG_LARGE_LINE_STREAM (in json format).
        """
        # log unicodes as their utf-8 encoded representation
        if isinstance(line, six.text_type):
            line = line.encode('UTF-8')

        # check log line size
        if len(line) <= WARNING_LINE_SIZE_IN_BYTES:
            self._log_line_no_size_limit(stream, line)
        elif len(line) <= MAX_LINE_SIZE_IN_BYTES:
            self._log_line_no_size_limit(stream, line)

            # log the origin of the stream with traceback to WHO_CLOG_LARGE_LINE_STREAM category
            origin_info = {}
            origin_info['stream'] = stream
            origin_info['line_size'] = len(line)
            origin_info['traceback'] = ''.join(traceback.format_stack())
            log_line = json.dumps(origin_info).encode('UTF-8')
            self._log_line_no_size_limit(WHO_CLOG_LARGE_LINE_STREAM, log_line)
            self.report_status(
                False,
                'The log line size is larger than %r bytes (monitored in \'%s\')'
                % (WARNING_LINE_SIZE_IN_BYTES, WHO_CLOG_LARGE_LINE_STREAM))
        else:
            # raise an exception if too large
            self.report_status(
                True,
                'The log line is dropped (line size larger than %r bytes)' %
                MAX_LINE_SIZE_IN_BYTES)
            raise LogLineIsTooLongError(
                'The max log line size allowed is %r bytes' %
                MAX_LINE_SIZE_IN_BYTES)

    def close(self):
        self.transport.close()
        self.connected = False
Exemple #22
0
class Connection(object):
    """Connection to an HBase Thrift server.

    The `host` and `port` arguments specify the host name and TCP port
    of the HBase Thrift server to connect to. If omitted or ``None``,
    a connection to the default port on ``localhost`` is made. If
    specifed, the `timeout` argument specifies the socket timeout in
    milliseconds.

    If `autoconnect` is `True` (the default) the connection is made
    directly, otherwise :py:meth:`Connection.open` must be called
    explicitly before first use.

    The optional `table_prefix` and `table_prefix_separator` arguments
    specify a prefix and a separator string to be prepended to all table
    names, e.g. when :py:meth:`Connection.table` is invoked. For
    example, if `table_prefix` is ``myproject``, all tables will
    have names like ``myproject_XYZ``.

    The optional `compat` argument sets the compatibility level for
    this connection. Older HBase versions have slightly different Thrift
    interfaces, and using the wrong protocol can lead to crashes caused
    by communication errors, so make sure to use the correct one. This
    value can be either the string ``0.90``, ``0.92``, ``0.94``, or
    ``0.96`` (the default).

    The optional `transport` argument specifies the Thrift transport
    mode to use. Supported values for this argument are ``buffered``
    (the default) and ``framed``. Make sure to choose the right one,
    since otherwise you might see non-obvious connection errors or
    program hangs when making a connection. HBase versions before 0.94
    always use the buffered transport. Starting with HBase 0.94, the
    Thrift server optionally uses a framed transport, depending on the
    argument passed to the ``hbase-daemon.sh start thrift`` command.
    The default ``-threadpool`` mode uses the buffered transport; the
    ``-hsha``, ``-nonblocking``, and ``-threadedselector`` modes use the
    framed transport.

    The optional `protocol` argument specifies the Thrift transport
    protocol to use. Supported values for this argument are ``binary``
    (the default) and ``compact``. Make sure to choose the right one,
    since otherwise you might see non-obvious connection errors or
    program hangs when making a connection. ``TCompactProtocol`` is
    a more compact binary format that is  typically more efficient to
    process as well. ``TBinaryProtocol`` is the default protocol that
    Happybase uses.

    .. versionadded:: 0.9
       `protocol` argument

    .. versionadded:: 0.5
       `timeout` argument

    .. versionadded:: 0.4
       `table_prefix_separator` argument

    .. versionadded:: 0.4
       support for framed Thrift transports

    :param str host: The host to connect to
    :param int port: The port to connect to
    :param int timeout: The socket timeout in milliseconds (optional)
    :param bool autoconnect: Whether the connection should be opened directly
    :param str table_prefix: Prefix used to construct table names (optional)
    :param str table_prefix_separator: Separator used for `table_prefix`
    :param str compat: Compatibility mode (optional)
    :param str transport: Thrift transport mode (optional)
    """
    def __init__(self,
                 host=DEFAULT_HOST,
                 port=DEFAULT_PORT,
                 timeout=None,
                 autoconnect=True,
                 table_prefix=None,
                 table_prefix_separator=b'_',
                 compat=DEFAULT_COMPAT,
                 transport=DEFAULT_TRANSPORT,
                 protocol=DEFAULT_PROTOCOL):

        if transport not in THRIFT_TRANSPORTS:
            raise ValueError("'transport' must be one of %s" %
                             ", ".join(THRIFT_TRANSPORTS.keys()))

        if table_prefix is not None:
            if not isinstance(table_prefix, STRING_OR_BINARY):
                raise TypeError("'table_prefix' must be a string")
            table_prefix = ensure_bytes(table_prefix)

        if not isinstance(table_prefix_separator, STRING_OR_BINARY):
            raise TypeError("'table_prefix_separator' must be a string")
        table_prefix_separator = ensure_bytes(table_prefix_separator)

        if compat not in COMPAT_MODES:
            raise ValueError("'compat' must be one of %s" %
                             ", ".join(COMPAT_MODES))

        if protocol not in THRIFT_PROTOCOLS:
            raise ValueError("'protocol' must be one of %s" %
                             ", ".join(THRIFT_PROTOCOLS))

        # Allow host and port to be None, which may be easier for
        # applications wrapping a Connection instance.
        self.host = host or DEFAULT_HOST
        self.port = port or DEFAULT_PORT
        self.timeout = timeout
        self.table_prefix = table_prefix
        self.table_prefix_separator = table_prefix_separator
        self.compat = compat

        self._transport_class = THRIFT_TRANSPORTS[transport]
        self._protocol_class = THRIFT_PROTOCOLS[protocol]
        self._refresh_thrift_client()

        if autoconnect:
            self.open()

        self._initialized = True

    def _refresh_thrift_client(self):
        """Refresh the Thrift socket, transport, and client."""
        socket = TSocket(host=self.host,
                         port=self.port,
                         socket_timeout=self.timeout)

        self.transport = self._transport_class(socket)
        protocol = self._protocol_class(self.transport, decode_response=False)
        self.client = TClient(Hbase, protocol)

    def _table_name(self, name):
        """Construct a table name by optionally adding a table name prefix."""
        name = ensure_bytes(name)
        if self.table_prefix is None:
            return name
        return self.table_prefix + self.table_prefix_separator + name

    def open(self):
        """Open the underlying transport to the HBase instance.

        This method opens the underlying Thrift transport (TCP connection).
        """
        if self.transport.is_open():
            return

        logger.debug("Opening Thrift transport to %s:%d", self.host, self.port)
        self.transport.open()

    def close(self):
        """Close the underyling transport to the HBase instance.

        This method closes the underlying Thrift transport (TCP connection).
        """
        if not self.transport.is_open():
            return

        if logger is not None:
            # If called from __del__(), module variables may no longer
            # exist.
            logger.debug("Closing Thrift transport to %s:%d", self.host,
                         self.port)

        self.transport.close()

    def __del__(self):
        try:
            self._initialized
        except AttributeError:
            # Failure from constructor
            return
        else:
            self.close()

    def table(self, name, use_prefix=True):
        """Return a table object.

        Returns a :py:class:`happybase.Table` instance for the table
        named `name`. This does not result in a round-trip to the
        server, and the table is not checked for existence.

        The optional `use_prefix` argument specifies whether the table
        prefix (if any) is prepended to the specified `name`. Set this
        to `False` if you want to use a table that resides in another
        ‘prefix namespace’, e.g. a table from a ‘friendly’ application
        co-hosted on the same HBase instance. See the `table_prefix`
        argument to the :py:class:`Connection` constructor for more
        information.

        :param str name: the name of the table
        :param bool use_prefix: whether to use the table prefix (if any)
        :return: Table instance
        :rtype: :py:class:`Table`
        """
        name = ensure_bytes(name)
        if use_prefix:
            name = self._table_name(name)
        return Table(name, self)

    #
    # Table administration and maintenance
    #

    def tables(self):
        """Return a list of table names available in this HBase instance.

        If a `table_prefix` was set for this :py:class:`Connection`, only
        tables that have the specified prefix will be listed.

        :return: The table names
        :rtype: List of strings
        """
        names = self.client.getTableNames()

        # Filter using prefix, and strip prefix from names
        if self.table_prefix is not None:
            prefix = self._table_name(b'')
            offset = len(prefix)
            names = [n[offset:] for n in names if n.startswith(prefix)]

        return names

    def create_table(self, name, families):
        """Create a table.

        :param str name: The table name
        :param dict families: The name and options for each column family

        The `families` argument is a dictionary mapping column family
        names to a dictionary containing the options for this column
        family, e.g.

        ::

            families = {
                'cf1': dict(max_versions=10),
                'cf2': dict(max_versions=1, block_cache_enabled=False),
                'cf3': dict(),  # use defaults
            }
            connection.create_table('mytable', families)

        These options correspond to the ColumnDescriptor structure in
        the Thrift API, but note that the names should be provided in
        Python style, not in camel case notation, e.g. `time_to_live`,
        not `timeToLive`. The following options are supported:

        * ``max_versions`` (`int`)
        * ``compression`` (`str`)
        * ``in_memory`` (`bool`)
        * ``bloom_filter_type`` (`str`)
        * ``bloom_filter_vector_size`` (`int`)
        * ``bloom_filter_nb_hashes`` (`int`)
        * ``block_cache_enabled`` (`bool`)
        * ``time_to_live`` (`int`)
        """
        name = self._table_name(name)
        if not isinstance(families, dict):
            raise TypeError("'families' arg must be a dictionary")

        if not families:
            raise ValueError(
                "Cannot create table %r (no column families specified)" % name)

        column_descriptors = []
        for cf_name, options in six.iteritems(families):
            if options is None:
                options = dict()

            kwargs = dict()
            for option_name, value in six.iteritems(options):
                kwargs[pep8_to_camel_case(option_name)] = value

            if not cf_name.endswith(':'):
                cf_name += ':'
            kwargs['name'] = cf_name

            column_descriptors.append(ColumnDescriptor(**kwargs))

        self.client.createTable(name, column_descriptors)

    def delete_table(self, name, disable=False):
        """Delete the specified table.

        .. versionadded:: 0.5
           `disable` argument

        In HBase, a table always needs to be disabled before it can be
        deleted. If the `disable` argument is `True`, this method first
        disables the table if it wasn't already and then deletes it.

        :param str name: The table name
        :param bool disable: Whether to first disable the table if needed
        """
        if disable and self.is_table_enabled(name):
            self.disable_table(name)

        name = self._table_name(name)
        self.client.deleteTable(name)

    def enable_table(self, name):
        """Enable the specified table.

        :param str name: The table name
        """
        name = self._table_name(name)
        self.client.enableTable(name)

    def disable_table(self, name):
        """Disable the specified table.

        :param str name: The table name
        """
        name = self._table_name(name)
        self.client.disableTable(name)

    def is_table_enabled(self, name):
        """Return whether the specified table is enabled.

        :param str name: The table name

        :return: whether the table is enabled
        :rtype: bool
        """
        name = self._table_name(name)
        return self.client.isTableEnabled(name)

    def compact_table(self, name, major=False):
        """Compact the specified table.

        :param str name: The table name
        :param bool major: Whether to perform a major compaction.
        """
        name = self._table_name(name)
        if major:
            self.client.majorCompact(name)
        else:
            self.client.compact(name)
Exemple #23
0
class Connection(object):
    """Connection to an HBase Thrift server.

    The `host` and `port` arguments specify the host name and TCP port
    of the HBase Thrift server to connect to. If omitted or ``None``,
    a connection to the default port on ``localhost`` is made. If
    specifed, the `timeout` argument specifies the socket timeout in
    milliseconds.

    If `autoconnect` is `True` (the default) the connection is made
    directly, otherwise :py:meth:`Connection.open` must be called
    explicitly before first use.

    The optional `table_prefix` and `table_prefix_separator` arguments
    specify a prefix and a separator string to be prepended to all table
    names, e.g. when :py:meth:`Connection.table` is invoked. For
    example, if `table_prefix` is ``myproject``, all tables tables will
    have names like ``myproject_XYZ``.

    The optional `compat` argument sets the compatibility level for
    this connection. Older HBase versions have slightly different Thrift
    interfaces, and using the wrong protocol can lead to crashes caused
    by communication errors, so make sure to use the correct one. This
    value can be either the string ``0.90``, ``0.92``, ``0.94``, or
    ``0.96`` (the default).

    The optional `transport` argument specifies the Thrift transport
    mode to use. Supported values for this argument are ``buffered``
    (the default) and ``framed``. Make sure to choose the right one,
    since otherwise you might see non-obvious connection errors or
    program hangs when making a connection. HBase versions before 0.94
    always use the buffered transport. Starting with HBase 0.94, the
    Thrift server optionally uses a framed transport, depending on the
    argument passed to the ``hbase-daemon.sh start thrift`` command.
    The default ``-threadpool`` mode uses the buffered transport; the
    ``-hsha``, ``-nonblocking``, and ``-threadedselector`` modes use the
    framed transport.

    The optional `protocol` argument specifies the Thrift transport
    protocol to use. Supported values for this argument are ``binary``
    (the default) and ``compact``. Make sure to choose the right one,
    since otherwise you might see non-obvious connection errors or
    program hangs when making a connection. ``TCompactProtocol`` is
    a more compact binary format that is  typically more efficient to
    process as well. ``TBinaryProtocol`` is the default protocol that
    Happybase uses.

    .. versionadded:: 0.9
       `protocol` argument

    .. versionadded:: 0.5
       `timeout` argument

    .. versionadded:: 0.4
       `table_prefix_separator` argument

    .. versionadded:: 0.4
       support for framed Thrift transports

    :param str host: The host to connect to
    :param int port: The port to connect to
    :param int timeout: The socket timeout in milliseconds (optional)
    :param bool autoconnect: Whether the connection should be opened directly
    :param str table_prefix: Prefix used to construct table names (optional)
    :param str table_prefix_separator: Separator used for `table_prefix`
    :param str compat: Compatibility mode (optional)
    :param str transport: Thrift transport mode (optional)
    """
    def __init__(self, host=DEFAULT_HOST, port=DEFAULT_PORT, timeout=None,
                 autoconnect=True, table_prefix=None,
                 table_prefix_separator='_', compat=DEFAULT_COMPAT,
                 transport=DEFAULT_TRANSPORT, protocol=DEFAULT_PROTOCOL):

        if transport not in THRIFT_TRANSPORTS:
            raise ValueError("'transport' must be one of %s"
                             % ", ".join(THRIFT_TRANSPORTS.keys()))

        if table_prefix is not None \
                and not isinstance(table_prefix, basestring):
            raise TypeError("'table_prefix' must be a string")

        if not isinstance(table_prefix_separator, basestring):
            raise TypeError("'table_prefix_separator' must be a string")

        if compat not in COMPAT_MODES:
            raise ValueError("'compat' must be one of %s"
                             % ", ".join(COMPAT_MODES))

        if protocol not in THRIFT_PROTOCOLS:
            raise ValueError("'protocol' must be one of %s"
                             % ", ".join(THRIFT_PROTOCOLS))

        # Allow host and port to be None, which may be easier for
        # applications wrapping a Connection instance.
        self.host = host or DEFAULT_HOST
        self.port = port or DEFAULT_PORT
        self.timeout = timeout
        self.table_prefix = table_prefix
        self.table_prefix_separator = table_prefix_separator
        self.compat = compat

        self._transport_class = THRIFT_TRANSPORTS[transport]
        self._protocol_class = THRIFT_PROTOCOLS[protocol]
        self._refresh_thrift_client()

        if autoconnect:
            self.open()

        self._initialized = True

    def _refresh_thrift_client(self):
        """Refresh the Thrift socket, transport, and client."""
        socket = TSocket(self.host, self.port)
        if self.timeout is not None:
            socket.set_timeout(self.timeout)

        self.transport = self._transport_class(socket)
        protocol = self._protocol_class(self.transport, decode_response=False)
        self.client = TClient(Hbase, protocol)

    def _table_name(self, name):
        """Construct a table name by optionally adding a table name prefix."""
        if self.table_prefix is None:
            return name

        return self.table_prefix + self.table_prefix_separator + name

    def open(self):
        """Open the underlying transport to the HBase instance.

        This method opens the underlying Thrift transport (TCP connection).
        """
        if self.transport.is_open():
            return

        logger.debug("Opening Thrift transport to %s:%d", self.host, self.port)
        self.transport.open()

    def close(self):
        """Close the underyling transport to the HBase instance.

        This method closes the underlying Thrift transport (TCP connection).
        """
        if not self.transport.is_open():
            return

        if logger is not None:
            # If called from __del__(), module variables may no longer
            # exist.
            logger.debug(
                "Closing Thrift transport to %s:%d",
                self.host, self.port)

        self.transport.close()

    def __del__(self):
        try:
            self._initialized
        except AttributeError:
            # Failure from constructor
            return
        else:
            self.close()

    def table(self, name, use_prefix=True):
        """Return a table object.

        Returns a :py:class:`happybase.Table` instance for the table
        named `name`. This does not result in a round-trip to the
        server, and the table is not checked for existence.

        The optional `use_prefix` argument specifies whether the table
        prefix (if any) is prepended to the specified `name`. Set this
        to `False` if you want to use a table that resides in another
        ‘prefix namespace’, e.g. a table from a ‘friendly’ application
        co-hosted on the same HBase instance. See the `table_prefix`
        argument to the :py:class:`Connection` constructor for more
        information.

        :param str name: the name of the table
        :param bool use_prefix: whether to use the table prefix (if any)
        :return: Table instance
        :rtype: :py:class:`Table`
        """
        if use_prefix:
            name = self._table_name(name)
        return Table(name, self)

    #
    # Table administration and maintenance
    #

    def tables(self):
        """Return a list of table names available in this HBase instance.

        If a `table_prefix` was set for this :py:class:`Connection`, only
        tables that have the specified prefix will be listed.

        :return: The table names
        :rtype: List of strings
        """
        names = self.client.getTableNames()

        # Filter using prefix, and strip prefix from names
        if self.table_prefix is not None:
            prefix = self._table_name('')
            offset = len(prefix)
            names = [n[offset:] for n in names if n.startswith(prefix)]

        return names

    def create_table(self, name, families):
        """Create a table.

        :param str name: The table name
        :param dict families: The name and options for each column family

        The `families` argument is a dictionary mapping column family
        names to a dictionary containing the options for this column
        family, e.g.

        ::

            families = {
                'cf1': dict(max_versions=10),
                'cf2': dict(max_versions=1, block_cache_enabled=False),
                'cf3': dict(),  # use defaults
            }
            connection.create_table('mytable', families)

        These options correspond to the ColumnDescriptor structure in
        the Thrift API, but note that the names should be provided in
        Python style, not in camel case notation, e.g. `time_to_live`,
        not `timeToLive`. The following options are supported:

        * ``max_versions`` (`int`)
        * ``compression`` (`str`)
        * ``in_memory`` (`bool`)
        * ``bloom_filter_type`` (`str`)
        * ``bloom_filter_vector_size`` (`int`)
        * ``bloom_filter_nb_hashes`` (`int`)
        * ``block_cache_enabled`` (`bool`)
        * ``time_to_live`` (`int`)
        """
        name = self._table_name(name)
        if not isinstance(families, dict):
            raise TypeError("'families' arg must be a dictionary")

        if not families:
            raise ValueError(
                "Cannot create table %r (no column families specified)"
                % name)

        column_descriptors = []
        for cf_name, options in families.iteritems():
            if options is None:
                options = dict()

            kwargs = dict()
            for option_name, value in options.iteritems():
                kwargs[pep8_to_camel_case(option_name)] = value

            if not cf_name.endswith(':'):
                cf_name += ':'
            kwargs['name'] = cf_name

            column_descriptors.append(ColumnDescriptor(**kwargs))

        self.client.createTable(name, column_descriptors)

    def delete_table(self, name, disable=False):
        """Delete the specified table.

        .. versionadded:: 0.5
           `disable` argument

        In HBase, a table always needs to be disabled before it can be
        deleted. If the `disable` argument is `True`, this method first
        disables the table if it wasn't already and then deletes it.

        :param str name: The table name
        :param bool disable: Whether to first disable the table if needed
        """
        if disable and self.is_table_enabled(name):
            self.disable_table(name)

        name = self._table_name(name)
        self.client.deleteTable(name)

    def enable_table(self, name):
        """Enable the specified table.

        :param str name: The table name
        """
        name = self._table_name(name)
        self.client.enableTable(name)

    def disable_table(self, name):
        """Disable the specified table.

        :param str name: The table name
        """
        name = self._table_name(name)
        self.client.disableTable(name)

    def is_table_enabled(self, name):
        """Return whether the specified table is enabled.

        :param str name: The table name

        :return: whether the table is enabled
        :rtype: bool
        """
        name = self._table_name(name)
        return self.client.isTableEnabled(name)

    def compact_table(self, name, major=False):
        """Compact the specified table.

        :param str name: The table name
        :param bool major: Whether to perform a major compaction.
        """
        name = self._table_name(name)
        if major:
            self.client.majorCompact(name)
        else:
            self.client.compact(name)
Exemple #24
0
class RecommendationsClient(object):
    def __init__(self,
                 host='127.0.0.1',
                 port=7070,
                 timeout=3000,
                 calling_app=None,
                 **kwargs):
        self.__host = host
        self.__port = port
        LOG.info(
            'Starting connection to RTS Recommendations on {0}:{1}'.format(
                host, port))

        self.__timeout = timeout
        if not calling_app:
            raise ValueError('Must supply a calling app string')
        self.__calling_app = calling_app

        self.__create_client()

        self.__open = False
        self.__initialized = True

    def __create_client(self):
        socket = TSocket(self.__host,
                         self.__port,
                         socket_timeout=self.__timeout)
        self.__transport = TCyFramedTransportFactory().get_transport(socket)
        protocol = TCyBinaryProtocolFactory().get_protocol(self.__transport)
        self.__client = TClient(recs_thrift.RecommendationsService, protocol)

    def open(self):
        if self.__transport.is_open():
            return

        self.__transport.open()
        self.__open = True

    def close(self):
        if not self.__transport.is_open():
            return

        self.__transport.close()
        self.__open = False

    def __del__(self):
        try:
            self.__initialized
        except AttributeError:
            return
        else:
            self.close()

    def client(self):
        return self.__client

    def is_open(self):
        return self.__open

    def __enter__(self):
        self.open()
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.close()

    def get_recommendations(self, request):
        request_id = ''.join(
            random.choice(string.ascii_uppercase + string.digits)
            for _ in range(8))
        context = recs_thrift.TRequestContext(self.__calling_app, request_id)
        result = [
            Recommendation.from_thrift(rec)
            for rec in self.__client.get_recommendations(
                context, request.to_thrift())
        ]

        return result
Exemple #25
0
class Connection:
    def __init__(self, host="127.0.0.1",
                 port=9090, timeout=5000, auto_connect=True, table_prefix="",
                 compat="0.98", transport_class=None, protocol_class=None):
        self.host = host
        self.port = port
        self.timeout = timeout
        self.table_prefix = table_prefix  # not used
        self.compat = compat
        self.transport_class = transport_class or TBufferedTransport
        self.protocol_class = protocol_class or TBinaryProtocol

        self.transport = None
        self.protocol = None
        self.client = None

        if auto_connect:
            self.open()

    def open(self):
        if self.transport and self.transport.is_open():
            logger.debug("transport was opened.")
            return
        logger.debug("new transport...")
        socket = TSocket(host=self.host, port=self.port, socket_timeout=self.timeout)
        self.transport = self.transport_class(socket)
        self.protocol = self.protocol_class(self.transport)
        self.client = TClient(hbase_thrift.Hbase, self.protocol)

        self.transport.open()

    def close(self):
        if self.transport and self.transport.is_open():
            logger.debug("transport is closing.")
            self.transport.close()
            self.transport = None
            self.protocol = None
            self.client = None

    # ############## todo: 分割出下面方法
    def table(self, name):
        return Table(name, self)

    def tables(self):
        """
        List all the userspace tables.

        ['emp', 'project1_emp']
        :rtype: list
        :return:
        """
        return self.client.getTableNames()

    def create_table(self, table_name, column_families):
        """
        http://hbase.apache.org/0.94/book/columnfamily.html

        create table

        :type table_name: str
        :type column_families: list

        :param table_name:
        :param column_families:

        :return: None
        :rtype: None

        """

        return self.client.createTable(table_name, column_families)

    def delete_table(self, name, disable=False):
        """
        delete table
        :type name: str
        :type disable: bool

        :param name:
        :param disable:
        :return: None
        :rtype None
        """
        if disable and self.is_table_enabled(name):
            self.disable_table(name)
        self.client.deleteTable(name)

    def enable_table(self, name):
        """
        Brings a table on-line (enables it)
        :type name: str
        :param name:
        :return:
        """
        if not self.is_table_enabled(name):
            self.client.enableTable(name)

    def disable_table(self, name):
        """
        Disables a table (takes it off-line) If it is being served, the master
        will tell the servers to stop serving it.
        :type name: str
        :param name:
        :return:
        """
        if self.is_table_enabled(name):
            return self.client.disableTable(name)

    def is_table_enabled(self, name):
        """
        name of the table to check
        :type name: str
        :param name:
        :return:
        """
        return self.client.isTableEnabled(name)

    def compact_table(self, table_name_or_region_name, major=False):
        """
        Compact the specified table

        :type table_name_or_region_name: str
        :param table_name_or_region_name:
        :param major:
        :return:
        """
        if major:
            return self.client.majorCompact(table_name_or_region_name)
        return self.client.compact(table_name_or_region_name)
Exemple #26
0
 def get_tclient(self, service, protocol):
     from thriftpy.thrift import TClient
     return TClient(service, protocol)
Exemple #27
0
def make_client(service, host, port, proto_factory=TBinaryProtocolFactory()):
    transport = TBufferedTransport(TSocket(host, port))
    protocol = proto_factory.get_protocol(transport)
    transport.open()
    return TClient(service, protocol)