コード例 #1
0
    def import_table(self, data: pyarrow.Table):
        try:
            options = paflight.FlightCallOptions(
                headers=self.session.grpc_metadata)
            if not isinstance(data, (pa.Table, pa.RecordBatch)):
                raise DHError(
                    "source data must be either a pa table or RecordBatch.")
            ticket = self.session.get_ticket()
            dh_fields = []
            for f in data.schema:
                dh_fields.append(
                    pa.field(name=f.name,
                             type=f.type,
                             metadata=_map_arrow_type(f.type)))
            dh_schema = pa.schema(dh_fields)

            writer, reader = self._flight_client.do_put(
                pa.flight.FlightDescriptor.for_path("export", str(ticket)),
                dh_schema,
                options=options)
            writer.write_table(data)
            writer.close()
            _ = reader.read()
            flight_ticket = self.session.make_ticket(ticket)
            return Table(self.session,
                         ticket=flight_ticket,
                         size=data.num_rows,
                         schema=dh_schema)
        except Exception as e:
            raise DHError(
                "failed to create a Deephaven table from Arrow data.") from e
コード例 #2
0
 def snapshot_table(self, table: Table):
     try:
         options = paflight.FlightCallOptions(
             headers=self.session.grpc_metadata)
         flight_ticket = paflight.Ticket(table.ticket.ticket)
         reader = self._flight_client.do_get(flight_ticket, options=options)
         return reader.read_all()
     except Exception as e:
         raise DHError("failed to take a snapshot of the table.") from e
コード例 #3
0
def test_timeout_fires():
    """Make sure timeouts fire on slow requests."""
    # Do this in a separate thread so that if it fails, we don't hang
    # the entire test process
    with flight_server(SlowFlightServer) as server_location:
        client = flight.FlightClient.connect(server_location)
        action = flight.Action("", b"")
        options = flight.FlightCallOptions(timeout=0.2)
        # gRPC error messages change based on version, so don't look
        # for a particular error
        with pytest.raises(flight.FlightTimedOutError):
            list(client.do_action(action, options=options))
コード例 #4
0
    def __init__(self, user, password, location=(), tls=False, verifyTls=True):
        token = base64.b64encode(f'{user}:{password}'.encode('utf8'))
        self._options = flight.FlightCallOptions(headers=[(b'authorization',
                                                           b'Basic ' + token)])

        host, port = _DEFAULT_HOST, _DEFAULT_PORT
        if len(location) > 0:
            host = location[0]
        if len(location) > 1:
            port = location[1]
        if tls:
            self._location = flight.Location.for_grpc_tls(host, port)
        else:
            self._location = flight.Location.for_grpc_tcp(host, port)
        self._client = flight.FlightClient(
            self._location, disable_server_verification=(not verifyTls))
コード例 #5
0
ファイル: __init__.py プロジェクト: rymurr/dremio_client
    def connect(
        hostname="localhost", port=32010, username="******", password="******", tls_root_certs_filename=None
    ):
        """
        Connect to and authenticate against Dremio's arrow flight server. Auth is skipped if username is None

        :param hostname: Dremio coordinator hostname
        :param port: Dremio coordinator port
        :param username: Username on Dremio
        :param password: Password on Dremio
        :param tls_root_certs_filename: use ssl to connect with root certs from filename
        :return: arrow flight client
        """
        
        scheme = "grpc+tcp"
        connection_args = {}
        
        if tls_root_certs_filename:
            with open(tls_root_certs_filename) as root_certs:
                connection_args["tls_root_certs"] = root_certs.read()
            scheme = "grpc+tls"
        else:
            # use default unencrypted TCP connection
            pass
        
        # Two WLM settings can be provided upon initial authentication
        # with the Dremio Server Flight Endpoint:
        # - routing-tag
        # - routing queue
        
        client_auth_middleware = DremioClientAuthMiddlewareFactory()
        client = flight.FlightClient("{}://{}:{}".format(scheme, hostname, port),
                                     middleware=[client_auth_middleware], **connection_args)
        
        if username and password:
            encoded_credentials = base64.b64encode(b'' + username.encode() + b':' + password.encode())
            initial_options = flight.FlightCallOptions(headers=[
                (b'authorization', b'Basic ' + encoded_credentials)
            ])
#             client.authenticate_basic_token(username, password, initial_options)
        return initial_options, client
コード例 #6
0
def connect_to_dremio_flight_server_endpoint(hostname, flightport, username,
                                             password, sqlquery, tls, certs):
    """
    Connects to Dremio Flight server endpoint with the provided credentials.
    It also runs the query and retrieves the result set.
    """

    try:
        # Default to use an unencrypted TCP connection.
        scheme = "grpc+tcp"
        connection_args = {}

        if tls:
            # Connect to the server endpoint with an encrypted TLS connection.
            print('[INFO] Enabling TLS connection')
            scheme = "grpc+tls"
            if certs:
                print('[INFO] Trusted certificates provided')
                # TLS certificates are provided in a list of connection arguments.
                with open(certs, "rb") as root_certs:
                    connection_args["tls_root_certs"] = root_certs.read()
            else:
                print(
                    '[ERROR] Trusted certificates must be provided to establish a TLS connection'
                )
                sys.exit()

        # Two WLM settings can be provided upon initial authneitcation
        # with the Dremio Server Flight Endpoint:
        # - routing-tag
        # - routing queue
        initial_options = flight.FlightCallOptions(headers=[(
            b'routing-tag',
            b'test-routing-tag'), (b'routing-queue',
                                   b'Low Cost User Queries')])
        client_auth_middleware = DremioClientAuthMiddlewareFactory()
        client = flight.FlightClient("{}://{}:{}".format(
            scheme, hostname, flightport),
                                     middleware=[client_auth_middleware],
                                     **connection_args)

        # Authenticate with the server endpoint.
        bearer_token = client.authenticate_basic_token(username, password,
                                                       initial_options)
        print('[INFO] Authentication was successful')

        if sqlquery:
            # Construct FlightDescriptor for the query result set.
            flight_desc = flight.FlightDescriptor.for_command(sqlquery)
            print('[INFO] Query: ', sqlquery)

            # In addition to the bearer token, a query context can also
            # be provided as an entry of FlightCallOptions.
            # options = flight.FlightCallOptions(headers=[
            #     bearer_token,
            #     (b'schema', b'test.schema')
            # ])

            # Retrieve the schema of the result set.
            options = flight.FlightCallOptions(headers=[bearer_token])
            schema = client.get_schema(flight_desc, options)
            print('[INFO] GetSchema was successful')
            print('[INFO] Schema: ', schema)

            # Get the FlightInfo message to retrieve the Ticket corresponding
            # to the query result set.
            flight_info = client.get_flight_info(
                flight.FlightDescriptor.for_command(sqlquery), options)
            print('[INFO] GetFlightInfo was successful')
            print('[INFO] Ticket: ', flight_info.endpoints[0].ticket)

            # Retrieve the result set as a stream of Arrow record batches.
            reader = client.do_get(flight_info.endpoints[0].ticket, options)
            print('[INFO] Reading query results from Dremio')
            print(reader.read_pandas())

    except Exception as exception:
        print("[ERROR] Exception: {}".format(repr(exception)))
        raise
コード例 #7
0
def test_timeout_passes():
    """Make sure timeouts do not fire on fast requests."""
    with flight_server(ConstantFlightServer) as server_location:
        client = flight.FlightClient.connect(server_location)
        options = flight.FlightCallOptions(timeout=5.0)
        client.do_get(flight.Ticket(b'ints'), options=options).read_all()
コード例 #8
0
def test_timeout_passes():
    """Make sure timeouts do not fire on fast requests."""
    with ConstantFlightServer() as server:
        client = FlightClient(('localhost', server.port))
        options = flight.FlightCallOptions(timeout=5.0)
        client.do_get(flight.Ticket(b'ints'), options=options).read_all()
コード例 #9
0
        put_writer, put_meta_reader = self.con.do_put(desc, table.schema,
                                                      options=self.options)
        put_writer.write(table)
        put_writer.close()

    # Request a pyarrow.Table by name
    def get_table(self, name):
        reader = self.con.do_get(flight.Ticket(name.encode('utf8')),
                                 options=self.options)
        return reader.read_all()

    def list_actions(self):
        return self.con.list_actions()

ipc_options = pa.ipc.IpcWriteOptions(compression='zstd')
options = flight.FlightCallOptions(write_options=ipc_options)
client = DemoClient(location, options=options)

dataset, files_list = get_s3_dataset("s3://molbeam/tested")

for count, table in tenumerate(dataset.to_batches(columns=["canonical_ID", "enumerated_smiles", "achiral_fp"]), total=len(files_list)):

    client.cache_table_in_server(files_list[count], table)

@stopwatch
def get_single_table_from_flight_server(target):
    table_received = client.get_table(target)
    return table_received

recieved_table = get_table_from_flight_server(files_list[0])
print(recieved_table)
コード例 #10
0
    def run(
        self,
        username: str,
        password: str,
        hostname: str,
        flightport: int,
        tls: bool,
        certs: str,
        query: str,
    ) -> Dict:
        """
        Task run method. Executes a query against Dremio and fetches results.

        Args:
            - username (str): user name used to authenticate
            - password (str): password used to authenticate
            - hostname (str): Dremio host address
            - flightport (int, optional): port used to connect to FlightClient, defaults to 32010 if not
                provided
            - tls (bool): connect to the server endpoint with an encrypted TLS connection
            - certs (str): path to a certificate
            - query (str, optional): query to execute against query engine

        Returns:
            - dict: a dictionary of data returned by Dremio

        Raises:
            - ValueError: if `query` is `None`
        """
        if not query:
            raise ValueError("A query string must be provided")
        scheme = "grpc+tcp"
        connection_args = {}

        if tls:
            # Connect to the server endpoint with an encrypted TLS connection.
            self.logger.debug("Enabling TLS connection")
            scheme = "grpc+tls"
            if certs:
                self.logger.debug("Trusted certificates provided")
                # TLS certificates are provided in a list of connection arguments.
                with open(certs, "rb") as root_certs:
                    connection_args["tls_root_certs"] = root_certs.read()
            else:
                self.logger.error(
                    "Trusted certificates must be provided to establish a TLS connection"
                )
                sys.exit()
        else:
            self.logger.info(
                "You are not using a secure connection. Consider setting tls=True"
            )

        # Two WLM settings can be provided upon initial authneitcation
        # with the Dremio Server Flight Endpoint:
        # - routing-tag
        # - routing queue
        initial_options = flight.FlightCallOptions(headers=[
            (b"routing-tag", b"test-routing-tag"),
            (b"routing-queue", b"Low Cost User Queries"),
        ])
        client_auth_middleware = DremioClientAuthMiddlewareFactory()
        client = flight.FlightClient(
            "{}://{}:{}".format(scheme, hostname, flightport),
            middleware=[client_auth_middleware],
            **connection_args,
        )

        # Authenticate with the server endpoint.
        bearer_token = client.authenticate_basic_token(username, password,
                                                       initial_options)
        self.logger.debug(
            "Authentication was successful. Token is valid for 30 hours.")

        # Retrieve the schema of the result set.
        options = flight.FlightCallOptions(headers=[bearer_token])

        # Get the FlightInfo message to retrieve the Ticket corresponding
        # to the query result set.
        flight_info = client.get_flight_info(
            flight.FlightDescriptor.for_command(query), options)
        self.logger.debug("GetFlightInfo was successful")

        # Retrieve the result set as a stream of Arrow record batches.
        reader = client.do_get(flight_info.endpoints[0].ticket, options)
        self.logger.debug("Reading query results from Dremio")

        # batches of data reduce the number of calls to the server

        batches = []
        while True:
            try:
                batch, _ = reader.read_chunk()
                batches.append(batch)
            except StopIteration:
                break
        data = pa.Table.from_batches(batches)

        return data.to_pydict()
コード例 #11
0
def connect_to_dremio_flight_server_endpoint(host, port, username, password, query,
                                             tls, certs, disable_server_verification, pat_or_auth_token,
                                             engine, session_properties):
    """
    Connects to Dremio Flight server endpoint with the provided credentials.
    It also runs the query and retrieves the result set.
    """
    try:
        # Default to use an unencrypted TCP connection.
        scheme = "grpc+tcp"
        connection_args = {}

        if tls:
            # Connect to the server endpoint with an encrypted TLS connection.
            print('[INFO] Enabling TLS connection')
            scheme = "grpc+tls"
            if certs:
                print('[INFO] Trusted certificates provided')
                # TLS certificates are provided in a list of connection arguments.
                with open(certs, "rb") as root_certs:
                    connection_args["tls_root_certs"] = root_certs.read()
            elif disable_server_verification:
                # Connect to the server endpoint with server verification disabled.
                print('[INFO] Disable TLS server verification.')
                connection_args['disable_server_verification'] = disable_server_verification
            else:
                print('[ERROR] Trusted certificates must be provided to establish a TLS connection')
                sys.exit()

        headers = session_properties
        if not headers:
            headers = []

        if engine:
            headers.append((b'routing_engine', engine.encode('utf-8')))

        # Two WLM settings can be provided upon initial authentication with the Dremio Server Flight Endpoint:
        # routing_tag
        # routing_queue
        headers.append((b'routing_tag', b'test-routing-tag'))
        headers.append((b'routing_queue', b'Low Cost User Queries'))

        client_cookie_middleware = CookieMiddlewareFactory()

        if pat_or_auth_token:
            client = flight.FlightClient("{}://{}:{}".format(scheme, host, port),
                                         middleware=[client_cookie_middleware], **connection_args)

            headers.append((b'authorization', "Bearer {}".format(pat_or_auth_token).encode('utf-8')))
            print('[INFO] Authentication skipped until first request')

        elif username and password:
            client_auth_middleware = DremioClientAuthMiddlewareFactory()
            client = flight.FlightClient("{}://{}:{}".format(scheme, host, port),
                                         middleware=[client_auth_middleware, client_cookie_middleware],
                                         **connection_args)

            # Authenticate with the server endpoint.
            bearer_token = client.authenticate_basic_token(username, password,
                                                           flight.FlightCallOptions(headers=headers))
            print('[INFO] Authentication was successful')
            headers.append(bearer_token)
        else:
            print('[ERROR] Username/password or PAT/Auth token must be supplied.')
            sys.exit()

        if query:
            # Construct FlightDescriptor for the query result set.
            flight_desc = flight.FlightDescriptor.for_command(query)
            print('[INFO] Query: ', query)

            # In addition to the bearer token, a query context can also
            # be provided as an entry of FlightCallOptions.
            # options = flight.FlightCallOptions(headers=[
            #     bearer_token,
            #     (b'schema', b'test.schema')
            # ])

            # Retrieve the schema of the result set.
            options = flight.FlightCallOptions(headers=headers)
            schema = client.get_schema(flight_desc, options)
            print('[INFO] GetSchema was successful')
            print('[INFO] Schema: ', schema)

            # Get the FlightInfo message to retrieve the Ticket corresponding
            # to the query result set.
            flight_info = client.get_flight_info(flight.FlightDescriptor.for_command(query), options)
            print('[INFO] GetFlightInfo was successful')
            print('[INFO] Ticket: ', flight_info.endpoints[0].ticket)

            # Retrieve the result set as a stream of Arrow record batches.
            reader = client.do_get(flight_info.endpoints[0].ticket, options)
            print('[INFO] Reading query results from Dremio')
            print(reader.read_pandas())

    except Exception as exception:
        print("[ERROR] Exception: {}".format(repr(exception)))
        raise