def import_table(self, data: pyarrow.Table): try: options = paflight.FlightCallOptions( headers=self.session.grpc_metadata) if not isinstance(data, (pa.Table, pa.RecordBatch)): raise DHError( "source data must be either a pa table or RecordBatch.") ticket = self.session.get_ticket() dh_fields = [] for f in data.schema: dh_fields.append( pa.field(name=f.name, type=f.type, metadata=_map_arrow_type(f.type))) dh_schema = pa.schema(dh_fields) writer, reader = self._flight_client.do_put( pa.flight.FlightDescriptor.for_path("export", str(ticket)), dh_schema, options=options) writer.write_table(data) writer.close() _ = reader.read() flight_ticket = self.session.make_ticket(ticket) return Table(self.session, ticket=flight_ticket, size=data.num_rows, schema=dh_schema) except Exception as e: raise DHError( "failed to create a Deephaven table from Arrow data.") from e
def snapshot_table(self, table: Table): try: options = paflight.FlightCallOptions( headers=self.session.grpc_metadata) flight_ticket = paflight.Ticket(table.ticket.ticket) reader = self._flight_client.do_get(flight_ticket, options=options) return reader.read_all() except Exception as e: raise DHError("failed to take a snapshot of the table.") from e
def test_timeout_fires(): """Make sure timeouts fire on slow requests.""" # Do this in a separate thread so that if it fails, we don't hang # the entire test process with flight_server(SlowFlightServer) as server_location: client = flight.FlightClient.connect(server_location) action = flight.Action("", b"") options = flight.FlightCallOptions(timeout=0.2) # gRPC error messages change based on version, so don't look # for a particular error with pytest.raises(flight.FlightTimedOutError): list(client.do_action(action, options=options))
def __init__(self, user, password, location=(), tls=False, verifyTls=True): token = base64.b64encode(f'{user}:{password}'.encode('utf8')) self._options = flight.FlightCallOptions(headers=[(b'authorization', b'Basic ' + token)]) host, port = _DEFAULT_HOST, _DEFAULT_PORT if len(location) > 0: host = location[0] if len(location) > 1: port = location[1] if tls: self._location = flight.Location.for_grpc_tls(host, port) else: self._location = flight.Location.for_grpc_tcp(host, port) self._client = flight.FlightClient( self._location, disable_server_verification=(not verifyTls))
def connect( hostname="localhost", port=32010, username="******", password="******", tls_root_certs_filename=None ): """ Connect to and authenticate against Dremio's arrow flight server. Auth is skipped if username is None :param hostname: Dremio coordinator hostname :param port: Dremio coordinator port :param username: Username on Dremio :param password: Password on Dremio :param tls_root_certs_filename: use ssl to connect with root certs from filename :return: arrow flight client """ scheme = "grpc+tcp" connection_args = {} if tls_root_certs_filename: with open(tls_root_certs_filename) as root_certs: connection_args["tls_root_certs"] = root_certs.read() scheme = "grpc+tls" else: # use default unencrypted TCP connection pass # Two WLM settings can be provided upon initial authentication # with the Dremio Server Flight Endpoint: # - routing-tag # - routing queue client_auth_middleware = DremioClientAuthMiddlewareFactory() client = flight.FlightClient("{}://{}:{}".format(scheme, hostname, port), middleware=[client_auth_middleware], **connection_args) if username and password: encoded_credentials = base64.b64encode(b'' + username.encode() + b':' + password.encode()) initial_options = flight.FlightCallOptions(headers=[ (b'authorization', b'Basic ' + encoded_credentials) ]) # client.authenticate_basic_token(username, password, initial_options) return initial_options, client
def connect_to_dremio_flight_server_endpoint(hostname, flightport, username, password, sqlquery, tls, certs): """ Connects to Dremio Flight server endpoint with the provided credentials. It also runs the query and retrieves the result set. """ try: # Default to use an unencrypted TCP connection. scheme = "grpc+tcp" connection_args = {} if tls: # Connect to the server endpoint with an encrypted TLS connection. print('[INFO] Enabling TLS connection') scheme = "grpc+tls" if certs: print('[INFO] Trusted certificates provided') # TLS certificates are provided in a list of connection arguments. with open(certs, "rb") as root_certs: connection_args["tls_root_certs"] = root_certs.read() else: print( '[ERROR] Trusted certificates must be provided to establish a TLS connection' ) sys.exit() # Two WLM settings can be provided upon initial authneitcation # with the Dremio Server Flight Endpoint: # - routing-tag # - routing queue initial_options = flight.FlightCallOptions(headers=[( b'routing-tag', b'test-routing-tag'), (b'routing-queue', b'Low Cost User Queries')]) client_auth_middleware = DremioClientAuthMiddlewareFactory() client = flight.FlightClient("{}://{}:{}".format( scheme, hostname, flightport), middleware=[client_auth_middleware], **connection_args) # Authenticate with the server endpoint. bearer_token = client.authenticate_basic_token(username, password, initial_options) print('[INFO] Authentication was successful') if sqlquery: # Construct FlightDescriptor for the query result set. flight_desc = flight.FlightDescriptor.for_command(sqlquery) print('[INFO] Query: ', sqlquery) # In addition to the bearer token, a query context can also # be provided as an entry of FlightCallOptions. # options = flight.FlightCallOptions(headers=[ # bearer_token, # (b'schema', b'test.schema') # ]) # Retrieve the schema of the result set. options = flight.FlightCallOptions(headers=[bearer_token]) schema = client.get_schema(flight_desc, options) print('[INFO] GetSchema was successful') print('[INFO] Schema: ', schema) # Get the FlightInfo message to retrieve the Ticket corresponding # to the query result set. flight_info = client.get_flight_info( flight.FlightDescriptor.for_command(sqlquery), options) print('[INFO] GetFlightInfo was successful') print('[INFO] Ticket: ', flight_info.endpoints[0].ticket) # Retrieve the result set as a stream of Arrow record batches. reader = client.do_get(flight_info.endpoints[0].ticket, options) print('[INFO] Reading query results from Dremio') print(reader.read_pandas()) except Exception as exception: print("[ERROR] Exception: {}".format(repr(exception))) raise
def test_timeout_passes(): """Make sure timeouts do not fire on fast requests.""" with flight_server(ConstantFlightServer) as server_location: client = flight.FlightClient.connect(server_location) options = flight.FlightCallOptions(timeout=5.0) client.do_get(flight.Ticket(b'ints'), options=options).read_all()
def test_timeout_passes(): """Make sure timeouts do not fire on fast requests.""" with ConstantFlightServer() as server: client = FlightClient(('localhost', server.port)) options = flight.FlightCallOptions(timeout=5.0) client.do_get(flight.Ticket(b'ints'), options=options).read_all()
put_writer, put_meta_reader = self.con.do_put(desc, table.schema, options=self.options) put_writer.write(table) put_writer.close() # Request a pyarrow.Table by name def get_table(self, name): reader = self.con.do_get(flight.Ticket(name.encode('utf8')), options=self.options) return reader.read_all() def list_actions(self): return self.con.list_actions() ipc_options = pa.ipc.IpcWriteOptions(compression='zstd') options = flight.FlightCallOptions(write_options=ipc_options) client = DemoClient(location, options=options) dataset, files_list = get_s3_dataset("s3://molbeam/tested") for count, table in tenumerate(dataset.to_batches(columns=["canonical_ID", "enumerated_smiles", "achiral_fp"]), total=len(files_list)): client.cache_table_in_server(files_list[count], table) @stopwatch def get_single_table_from_flight_server(target): table_received = client.get_table(target) return table_received recieved_table = get_table_from_flight_server(files_list[0]) print(recieved_table)
def run( self, username: str, password: str, hostname: str, flightport: int, tls: bool, certs: str, query: str, ) -> Dict: """ Task run method. Executes a query against Dremio and fetches results. Args: - username (str): user name used to authenticate - password (str): password used to authenticate - hostname (str): Dremio host address - flightport (int, optional): port used to connect to FlightClient, defaults to 32010 if not provided - tls (bool): connect to the server endpoint with an encrypted TLS connection - certs (str): path to a certificate - query (str, optional): query to execute against query engine Returns: - dict: a dictionary of data returned by Dremio Raises: - ValueError: if `query` is `None` """ if not query: raise ValueError("A query string must be provided") scheme = "grpc+tcp" connection_args = {} if tls: # Connect to the server endpoint with an encrypted TLS connection. self.logger.debug("Enabling TLS connection") scheme = "grpc+tls" if certs: self.logger.debug("Trusted certificates provided") # TLS certificates are provided in a list of connection arguments. with open(certs, "rb") as root_certs: connection_args["tls_root_certs"] = root_certs.read() else: self.logger.error( "Trusted certificates must be provided to establish a TLS connection" ) sys.exit() else: self.logger.info( "You are not using a secure connection. Consider setting tls=True" ) # Two WLM settings can be provided upon initial authneitcation # with the Dremio Server Flight Endpoint: # - routing-tag # - routing queue initial_options = flight.FlightCallOptions(headers=[ (b"routing-tag", b"test-routing-tag"), (b"routing-queue", b"Low Cost User Queries"), ]) client_auth_middleware = DremioClientAuthMiddlewareFactory() client = flight.FlightClient( "{}://{}:{}".format(scheme, hostname, flightport), middleware=[client_auth_middleware], **connection_args, ) # Authenticate with the server endpoint. bearer_token = client.authenticate_basic_token(username, password, initial_options) self.logger.debug( "Authentication was successful. Token is valid for 30 hours.") # Retrieve the schema of the result set. options = flight.FlightCallOptions(headers=[bearer_token]) # Get the FlightInfo message to retrieve the Ticket corresponding # to the query result set. flight_info = client.get_flight_info( flight.FlightDescriptor.for_command(query), options) self.logger.debug("GetFlightInfo was successful") # Retrieve the result set as a stream of Arrow record batches. reader = client.do_get(flight_info.endpoints[0].ticket, options) self.logger.debug("Reading query results from Dremio") # batches of data reduce the number of calls to the server batches = [] while True: try: batch, _ = reader.read_chunk() batches.append(batch) except StopIteration: break data = pa.Table.from_batches(batches) return data.to_pydict()
def connect_to_dremio_flight_server_endpoint(host, port, username, password, query, tls, certs, disable_server_verification, pat_or_auth_token, engine, session_properties): """ Connects to Dremio Flight server endpoint with the provided credentials. It also runs the query and retrieves the result set. """ try: # Default to use an unencrypted TCP connection. scheme = "grpc+tcp" connection_args = {} if tls: # Connect to the server endpoint with an encrypted TLS connection. print('[INFO] Enabling TLS connection') scheme = "grpc+tls" if certs: print('[INFO] Trusted certificates provided') # TLS certificates are provided in a list of connection arguments. with open(certs, "rb") as root_certs: connection_args["tls_root_certs"] = root_certs.read() elif disable_server_verification: # Connect to the server endpoint with server verification disabled. print('[INFO] Disable TLS server verification.') connection_args['disable_server_verification'] = disable_server_verification else: print('[ERROR] Trusted certificates must be provided to establish a TLS connection') sys.exit() headers = session_properties if not headers: headers = [] if engine: headers.append((b'routing_engine', engine.encode('utf-8'))) # Two WLM settings can be provided upon initial authentication with the Dremio Server Flight Endpoint: # routing_tag # routing_queue headers.append((b'routing_tag', b'test-routing-tag')) headers.append((b'routing_queue', b'Low Cost User Queries')) client_cookie_middleware = CookieMiddlewareFactory() if pat_or_auth_token: client = flight.FlightClient("{}://{}:{}".format(scheme, host, port), middleware=[client_cookie_middleware], **connection_args) headers.append((b'authorization', "Bearer {}".format(pat_or_auth_token).encode('utf-8'))) print('[INFO] Authentication skipped until first request') elif username and password: client_auth_middleware = DremioClientAuthMiddlewareFactory() client = flight.FlightClient("{}://{}:{}".format(scheme, host, port), middleware=[client_auth_middleware, client_cookie_middleware], **connection_args) # Authenticate with the server endpoint. bearer_token = client.authenticate_basic_token(username, password, flight.FlightCallOptions(headers=headers)) print('[INFO] Authentication was successful') headers.append(bearer_token) else: print('[ERROR] Username/password or PAT/Auth token must be supplied.') sys.exit() if query: # Construct FlightDescriptor for the query result set. flight_desc = flight.FlightDescriptor.for_command(query) print('[INFO] Query: ', query) # In addition to the bearer token, a query context can also # be provided as an entry of FlightCallOptions. # options = flight.FlightCallOptions(headers=[ # bearer_token, # (b'schema', b'test.schema') # ]) # Retrieve the schema of the result set. options = flight.FlightCallOptions(headers=headers) schema = client.get_schema(flight_desc, options) print('[INFO] GetSchema was successful') print('[INFO] Schema: ', schema) # Get the FlightInfo message to retrieve the Ticket corresponding # to the query result set. flight_info = client.get_flight_info(flight.FlightDescriptor.for_command(query), options) print('[INFO] GetFlightInfo was successful') print('[INFO] Ticket: ', flight_info.endpoints[0].ticket) # Retrieve the result set as a stream of Arrow record batches. reader = client.do_get(flight_info.endpoints[0].ticket, options) print('[INFO] Reading query results from Dremio') print(reader.read_pandas()) except Exception as exception: print("[ERROR] Exception: {}".format(repr(exception))) raise