Beispiel #1
0
class TestAIOFramedBinary(_TestAIO):
    TRANSPORT_FACTORY = TAsyncFramedTransportFactory()
    PROTOCOL_FACTORY = TAsyncBinaryProtocolFactory()
class Connection:
    """
    Connection to an HBase Thrift server.

    The `host` and `port` arguments specify the host name and TCP port
    of the HBase Thrift server to connect to. If omitted or ``None``,
    a connection to the default port on ``localhost`` is made. If
    specifed, the `timeout` argument specifies the socket timeout in
    milliseconds.

    If `autoconnect` is `True` the connection is made directly during
    initialization. Otherwise a context manager should be used (with
    Connection...) or :py:meth:`Connection.open` must be called explicitly
    before first use. Note that due to limitations in the Python async
    framework, a RuntimeError will be raised if it is used inside of a running
    asyncio event loop.

    The optional `table_prefix` and `table_prefix_separator` arguments
    specify a prefix and a separator string to be prepended to all table
    names, e.g. when :py:meth:`Connection.table` is invoked. For
    example, if `table_prefix` is ``myproject``, all tables will
    have names like ``myproject_XYZ``.

    The optional `compat` argument sets the compatibility level for
    this connection. Older HBase versions have slightly different Thrift
    interfaces, and using the wrong protocol can lead to crashes caused
    by communication errors, so make sure to use the correct one. This
    value can be either the string ``0.90``, ``0.92``, ``0.94``, or
    ``0.96`` (the default).

    The optional `transport` argument specifies the Thrift transport
    mode to use. Supported values for this argument are ``buffered``
    (the default) and ``framed``. Make sure to choose the right one,
    since otherwise you might see non-obvious connection errors or
    program hangs when making a connection. HBase versions before 0.94
    always use the buffered transport. Starting with HBase 0.94, the
    Thrift server optionally uses a framed transport, depending on the
    argument passed to the ``hbase-daemon.sh start thrift`` command.
    The default ``-threadpool`` mode uses the buffered transport; the
    ``-hsha``, ``-nonblocking``, and ``-threadedselector`` modes use the
    framed transport.

    The optional `protocol` argument specifies the Thrift transport
    protocol to use. Supported values for this argument are ``binary``
    (the default) and ``compact``. Make sure to choose the right one,
    since otherwise you might see non-obvious connection errors or
    program hangs when making a connection. ``TCompactProtocol`` is
    a more compact binary format that is  typically more efficient to
    process as well. ``TBinaryProtocol`` is the default protocol that
    AIOHappyBase uses.

    The optional `client` argument specifies the type of Thrift client
    to use. Supported values for this argument are ``socket``
    (the default) and ``http``. Make sure to choose the right one,
    since otherwise you might see non-obvious connection errors or
    program hangs when making a connection. To check which client
    you should use, refer to the ``hbase.regionserver.thrift.http``
    setting. If it is ``true`` use ``http``, otherwise use ``socket``.

    .. versionadded:: v1.4.0
        `client` argument

    .. versionadded:: 0.9
       `protocol` argument

    .. versionadded:: 0.5
       `timeout` argument

    .. versionadded:: 0.4
       `table_prefix_separator` argument

    .. versionadded:: 0.4
       support for framed Thrift transports
    """
    # TODO: Auto generate these?
    THRIFT_TRANSPORTS = dict(
        buffered=TAsyncBufferedTransportFactory(),
        framed=TAsyncFramedTransportFactory(),
    )
    THRIFT_PROTOCOLS = dict(
        binary=TAsyncBinaryProtocolFactory(decode_response=False),
        compact=TAsyncCompactProtocolFactory(decode_response=False),
    )
    THRIFT_CLIENTS = dict(
        socket=make_client,
        http=make_http_client,
    )

    def __init__(self,
                 host: str = DEFAULT_HOST,
                 port: int = DEFAULT_PORT,
                 timeout: int = None,
                 autoconnect: bool = False,
                 table_prefix: AnyStr = None,
                 table_prefix_separator: AnyStr = b'_',
                 compat: str = DEFAULT_COMPAT,
                 transport: str = DEFAULT_TRANSPORT,
                 protocol: str = DEFAULT_PROTOCOL,
                 client: str = DEFAULT_CLIENT,
                 **client_kwargs: Any):
        """
        :param host: The host to connect to
        :param port: The port to connect to
        :param timeout: The socket timeout in milliseconds (optional)
        :param autoconnect: Whether the connection should be opened directly
        :param table_prefix: Prefix used to construct table names (optional)
        :param table_prefix_separator: Separator used for `table_prefix`
        :param compat: Compatibility mode (optional)
        :param transport: Thrift transport mode (optional)
        :param protocol: Thrift protocol mode (optional)
        :param client: Thrift client mode (optional)
        :param client_kwargs:
            Extra keyword arguments for `make_client()`. See the ThriftPy2
            documentation for more information.
        """
        if table_prefix is not None:
            if not isinstance(table_prefix, (str, bytes)):
                raise TypeError("'table_prefix' must be a string")
            table_prefix = ensure_bytes(table_prefix)

        if not isinstance(table_prefix_separator, (str, bytes)):
            raise TypeError("'table_prefix_separator' must be a string")
        table_prefix_separator = ensure_bytes(table_prefix_separator)

        check_invalid_items(
            compat=(compat, COMPAT_MODES),
            transport=(transport, self.THRIFT_TRANSPORTS),
            protocol=(protocol, self.THRIFT_PROTOCOLS),
            client=(client, self.THRIFT_CLIENTS),
        )

        # Allow host and port to be None, which may be easier for
        # applications wrapping a Connection instance.
        self.host = host or DEFAULT_HOST
        self.port = port or DEFAULT_PORT
        self.timeout = timeout
        self.table_prefix = table_prefix
        self.table_prefix_separator = table_prefix_separator
        self.compat = compat

        self._transport_factory = self.THRIFT_TRANSPORTS[transport]
        self._protocol_factory = self.THRIFT_PROTOCOLS[protocol]
        self._client_factory = self.THRIFT_CLIENTS[client]

        self.client_kwargs = {
            'service': Hbase,
            'host': self.host,
            'port': self.port,
            'timeout': self.timeout,
            'trans_factory': self._transport_factory,
            'proto_factory': self._protocol_factory,
            **client_kwargs,
        }
        self.client = None

        if autoconnect:
            self._autoconnect()

    def _autoconnect(self):
        run_coro(self.open(), "Cannot autoconnect in a running event loop!")

    def _table_name(self, name: AnyStr) -> bytes:
        """Construct a table name by optionally adding a table name prefix."""
        name = ensure_bytes(name)
        if self.table_prefix is None:
            return name
        return self.table_prefix + self.table_prefix_separator + name

    async def open(self) -> None:
        """
        Create and open the underlying client to the HBase instance. This
        method can safely be called more than once.
        """
        if self.client is not None:
            return  # _refresh_thrift_client opened the transport

        logger.debug(f"Opening Thrift transport to {self.host}:{self.port}")
        self.client = await self._client_factory(**self.client_kwargs)

    def close(self) -> None:
        """
        Close the underlying client to the HBase instance. This method
        can be safely called more than once. Note that the client is
        destroyed after it is closed which will cause errors to occur
        if it is used again before reopening. The :py:class:`Connection`
        can be reopened by calling :py:meth:`open` again.
        """
        if self.client is None:
            return

        if logger is not None:
            # If called from __del__(), module variables may no longer exist.
            logger.debug(
                f"Closing Thrift transport to {self.host}:{self.port}")

        self.client.close()
        self.client = None

    def table(self, name: AnyStr, use_prefix: bool = True) -> Table:
        """
        Return a table object.

        Returns a :py:class:`happybase.Table` instance for the table
        named `name`. This does not result in a round-trip to the
        server, and the table is not checked for existence.

        The optional `use_prefix` argument specifies whether the table
        prefix (if any) is prepended to the specified `name`. Set this
        to `False` if you want to use a table that resides in another
        ‘prefix namespace’, e.g. a table from a ‘friendly’ application
        co-hosted on the same HBase instance. See the `table_prefix`
        argument to the :py:class:`Connection` constructor for more
        information.

        :param name: the name of the table
        :param use_prefix: whether to use the table prefix (if any)
        :return: Table instance
        """
        name = ensure_bytes(name)
        if use_prefix:
            name = self._table_name(name)
        return Table(name, self)

    # Table administration and maintenance

    async def tables(self) -> List[bytes]:
        """
        Return a list of table names available in this HBase instance.

        If a `table_prefix` was set for this :py:class:`Connection`, only
        tables that have the specified prefix will be listed.

        :return: The table names
        """
        names = await self.client.getTableNames()

        # Filter using prefix, and strip prefix from names
        if self.table_prefix is not None:
            prefix = self._table_name(b'')
            offset = len(prefix)
            names = [n[offset:] for n in names if n.startswith(prefix)]

        return names

    async def create_table(self, name: AnyStr,
                           families: Dict[str, Dict[str, Any]]) -> Table:
        """
        Create a table.

        :param name: The table name
        :param families: The name and options for each column family
        :return: The created table instance

        The `families` argument is a dictionary mapping column family
        names to a dictionary containing the options for this column
        family, e.g.

        ::

            families = {
                'cf1': dict(max_versions=10),
                'cf2': dict(max_versions=1, block_cache_enabled=False),
                'cf3': dict(),  # use defaults
            }
            connection.create_table('mytable', families)

        These options correspond to the ColumnDescriptor structure in
        the Thrift API, but note that the names should be provided in
        Python style, not in camel case notation, e.g. `time_to_live`,
        not `timeToLive`. The following options are supported:

        * ``max_versions`` (`int`)
        * ``compression`` (`str`)
        * ``in_memory`` (`bool`)
        * ``bloom_filter_type`` (`str`)
        * ``bloom_filter_vector_size`` (`int`)
        * ``bloom_filter_nb_hashes`` (`int`)
        * ``block_cache_enabled`` (`bool`)
        * ``time_to_live`` (`int`)
        """
        name = self._table_name(name)
        if not isinstance(families, dict):
            raise TypeError("'families' arg must be a dictionary")

        if not families:
            raise ValueError(f"No column families given for table: {name!r}")

        column_descriptors = []
        for cf_name, options in families.items():
            kwargs = {
                snake_to_camel_case(option_name): value
                for option_name, value in (options or {}).items()
            }

            if not cf_name.endswith(':'):
                cf_name += ':'
            kwargs['name'] = cf_name

            column_descriptors.append(ColumnDescriptor(**kwargs))

        await self.client.createTable(name, column_descriptors)
        return self.table(name, use_prefix=False)

    async def delete_table(self, name: AnyStr, disable: bool = False) -> None:
        """
        Delete the specified table.

        .. versionadded:: 0.5
           `disable` argument

        In HBase, a table always needs to be disabled before it can be
        deleted. If the `disable` argument is `True`, this method first
        disables the table if it wasn't already and then deletes it.

        :param name: The table name
        :param disable: Whether to first disable the table if needed
        """
        if disable and await self.is_table_enabled(name):
            await self.disable_table(name)

        await self.client.deleteTable(self._table_name(name))

    async def enable_table(self, name: AnyStr) -> None:
        """
        Enable the specified table.

        :param name: The table name
        """
        await self.client.enableTable(self._table_name(name))

    async def disable_table(self, name: AnyStr) -> None:
        """
        Disable the specified table.

        :param name: The table name
        """
        await self.client.disableTable(self._table_name(name))

    async def is_table_enabled(self, name: AnyStr) -> None:
        """
        Return whether the specified table is enabled.

        :param str name: The table name

        :return: whether the table is enabled
        :rtype: bool
        """
        return await self.client.isTableEnabled(self._table_name(name))

    async def compact_table(self, name: AnyStr, major: bool = False) -> None:
        """Compact the specified table.

        :param str name: The table name
        :param bool major: Whether to perform a major compaction.
        """
        name = self._table_name(name)
        if major:
            await self.client.majorCompact(name)
        else:
            await self.client.compact(name)

    # Support async context usage
    async def __aenter__(self) -> 'Connection':
        await self.open()
        return self

    async def __aexit__(self, *_exc) -> None:
        self.close()

    # Support context usage
    def __enter__(self) -> 'Connection':
        run_coro(self.open(),
                 error="Use 'async with' in a running event loop!")
        return self

    def __exit__(self, *_exc) -> None:
        self.close()

    def __del__(self) -> None:
        try:
            if self.client._iprot.trans.is_open():  # noqa
                logger.warning(f"{self} was not closed!")
        except:  # noqa
            pass