def connect(hostname='localhost', port=47470, username='******', password='******', tls_root_certs_filename=None): """ Connect to and authenticate against Dremio's arrow flight server. Auth is skipped if username is None :param hostname: Dremio coordinator hostname :param port: Dremio coordinator port :param username: Username on Dremio :param password: Password on Dremio :param tls_root_certs_filename: use ssl to connect with root certs from filename :return: arrow flight client """ if tls_root_certs_filename: with open(tls_root_certs_filename) as f: tls_root_certs = f.read() location = 'grpc+tls://{}:{}'.format(hostname, port) c = flight.FlightClient(location, tls_root_certs=tls_root_certs) else: location = 'grpc+tcp://{}:{}'.format(hostname, port) c = flight.FlightClient(location) if username: c.authenticate( HttpDremioClientAuthHandler(username, password if password else '')) return c
def _get_client(self): if self._client is None: self._client = fl.FlightClient(self._location) if self._api_key != ("", ""): self._client.authenticate( ClientAuthenticationHandler(self._api_key)) return self._client
def list_flights(self, location: str): """ Retrieves a list of flights """ flight_client = paf.FlightClient(f"{self._scheme}://{location}", **self._connection_args) return flight_client.list_flights()
def _load(self): client = flight.FlightClient(f'{self._protocol}://{self._hostname}') client.authenticate( HttpDremioClientAuthHandler(self._user, self._password)) info = client.get_flight_info( flight.FlightDescriptor.for_command(self._sql_expr)) reader = client.do_get(info.endpoints[0].ticket) self._dataframe = reader.read_pandas()
def __init__(self, connection_string): # TODO: Find a better way to extend to addition flight parameters splits = connection_string.split(";") client = flight.FlightClient('grpc+tcp://{0}:{1}'.format( splits[2].split("=")[1], splits[3].split("=")[1])) client.authenticate( HttpDremioClientAuthHandler(splits[0].split("=")[1], splits[1].split("=")[1])) self.flightclient = client self.closed = False self.cursors = []
def get_preview_data(self, params: Dict, optionalHeaders: Dict = None) -> Response: """Preview data from Dremio source """ database = params.get('database') if database != 'DREMIO': logging.info('Skipping table preview for non-Dremio table') return make_response(jsonify({'preview_data': {}}), HTTPStatus.OK) try: # Format base SQL_STATEMENT with request table and schema schema = '"{}"'.format(params['schema'].replace('.', '"."')) table = params['tableName'] sql = DremioPreviewClient.SQL_STATEMENT.format(schema=schema, table=table) client = flight.FlightClient(self.url, **self.connection_args) client.authenticate( _DremioAuthHandler(self.username, self.password)) flight_descriptor = flight.FlightDescriptor.for_command(sql) flight_info = client.get_flight_info(flight_descriptor) reader = client.do_get(flight_info.endpoints[0].ticket) result = reader.read_all() names = result.schema.names types = result.schema.types columns = map(lambda x: x.to_pylist(), result.columns) rows = [dict(zip(names, row)) for row in zip(*columns)] column_items = [ColumnItem(n, t) for n, t in zip(names, types)] preview_data = PreviewData(column_items, rows) try: data = PreviewDataSchema().dump(preview_data) PreviewDataSchema().load(data) # for validation only payload = jsonify({'preview_data': data}) return make_response(payload, HTTPStatus.OK) except ValidationError as err: logging.error( f'Error(s) occurred while building preview data: {err.messages}' ) payload = jsonify({'preview_data': {}}) return make_response(payload, HTTPStatus.INTERNAL_SERVER_ERROR) except Exception as e: logging.error(f'Encountered exception: {e}') payload = jsonify({'preview_data': {}}) return make_response(payload, HTTPStatus.INTERNAL_SERVER_ERROR)
def __init__(self, user, password, location=(), tls=False, verifyTls=True): token = base64.b64encode(f'{user}:{password}'.encode('utf8')) self._options = flight.FlightCallOptions(headers=[(b'authorization', b'Basic ' + token)]) host, port = _DEFAULT_HOST, _DEFAULT_PORT if len(location) > 0: host = location[0] if len(location) > 1: port = location[1] if tls: self._location = flight.Location.for_grpc_tls(host, port) else: self._location = flight.Location.for_grpc_tcp(host, port) self._client = flight.FlightClient( self._location, disable_server_verification=(not verifyTls))
def get_flight(self, object_id: plasma.ObjectID, location: str) -> paf.FlightStreamReader: """ Retreives an flight object stream """ descriptor = paf.FlightDescriptor.for_path( object_id.binary().hex().encode("utf-8")) logger.debug( f"connecting to {self._scheme}://{location} with descriptor {descriptor}" ) flight_client = paf.FlightClient(f"{self._scheme}://{location}", **self._connection_args) info = flight_client.get_flight_info(descriptor) for endpoint in info.endpoints: logger.debug("using endpoint locations %s", endpoint.locations) return flight_client.do_get(endpoint.ticket)
def connect( hostname="localhost", port=32010, username="******", password="******", tls_root_certs_filename=None ): """ Connect to and authenticate against Dremio's arrow flight server. Auth is skipped if username is None :param hostname: Dremio coordinator hostname :param port: Dremio coordinator port :param username: Username on Dremio :param password: Password on Dremio :param tls_root_certs_filename: use ssl to connect with root certs from filename :return: arrow flight client """ scheme = "grpc+tcp" connection_args = {} if tls_root_certs_filename: with open(tls_root_certs_filename) as root_certs: connection_args["tls_root_certs"] = root_certs.read() scheme = "grpc+tls" else: # use default unencrypted TCP connection pass # Two WLM settings can be provided upon initial authentication # with the Dremio Server Flight Endpoint: # - routing-tag # - routing queue client_auth_middleware = DremioClientAuthMiddlewareFactory() client = flight.FlightClient("{}://{}:{}".format(scheme, hostname, port), middleware=[client_auth_middleware], **connection_args) if username and password: encoded_credentials = base64.b64encode(b'' + username.encode() + b':' + password.encode()) initial_options = flight.FlightCallOptions(headers=[ (b'authorization', b'Basic ' + encoded_credentials) ]) # client.authenticate_basic_token(username, password, initial_options) return initial_options, client
def exists(self, object_id: plasma.ObjectID, owner: Optional[str] = None) -> bool: """ Returns true if the remote plasmaflight server contains the plasma object. """ # check cache if self.plasma_client.contains(object_id): return True # check remote if owner is not None: client = paf.FlightClient(f"{self._scheme}://{owner}", **self._connection_args) try: info = client.get_flight_info( paf.FlightDescriptor.for_path( object_id.binary().hex().encode("utf-8"))) return True except paf.FlightError: return False return False
def run(self, user: str, password: str, host: str, port: int, query: str) -> Dict: """ Task run method. Executes a query against Dremio and fetches results. Args: - user (str): user name used to authenticate - password (str): password used to authenticate - host (str): Dremio host address - port (int, optional): port used to connect to FlightClient, defaults to 32010 if not provided - query (str, optional): query to execute against query engine Returns: - dict: a dictionary of data returned by Dremio Raises: - ValueError: if `query` is `None` """ if not query: raise ValueError("A query string must be provided") client = flight.FlightClient(f"grpc+tcp://{host}:{port}") client.authenticate(HttpDremioClientAuthHandler(user, password)) info = client.get_flight_info( flight.FlightDescriptor.for_command(query + "--arrow flight")) reader = client.do_get(info.endpoints[0].ticket) batches = [] while True: try: batch, _ = reader.read_chunk() batches.append(batch) except StopIteration: break data = pa.Table.from_batches(batches) return data.to_pydict()
def connect_to_dremio_flight_server_endpoint(hostname, flightport, username, password, sqlquery, tls, certs): """ Connects to Dremio Flight server endpoint with the provided credentials. It also runs the query and retrieves the result set. """ try: # Default to use an unencrypted TCP connection. scheme = "grpc+tcp" connection_args = {} if tls: # Connect to the server endpoint with an encrypted TLS connection. print('[INFO] Enabling TLS connection') scheme = "grpc+tls" if certs: print('[INFO] Trusted certificates provided') # TLS certificates are provided in a list of connection arguments. with open(certs, "rb") as root_certs: connection_args["tls_root_certs"] = root_certs.read() else: print( '[ERROR] Trusted certificates must be provided to establish a TLS connection' ) sys.exit() # Two WLM settings can be provided upon initial authneitcation # with the Dremio Server Flight Endpoint: # - routing-tag # - routing queue initial_options = flight.FlightCallOptions(headers=[( b'routing-tag', b'test-routing-tag'), (b'routing-queue', b'Low Cost User Queries')]) client_auth_middleware = DremioClientAuthMiddlewareFactory() client = flight.FlightClient("{}://{}:{}".format( scheme, hostname, flightport), middleware=[client_auth_middleware], **connection_args) # Authenticate with the server endpoint. bearer_token = client.authenticate_basic_token(username, password, initial_options) print('[INFO] Authentication was successful') if sqlquery: # Construct FlightDescriptor for the query result set. flight_desc = flight.FlightDescriptor.for_command(sqlquery) print('[INFO] Query: ', sqlquery) # In addition to the bearer token, a query context can also # be provided as an entry of FlightCallOptions. # options = flight.FlightCallOptions(headers=[ # bearer_token, # (b'schema', b'test.schema') # ]) # Retrieve the schema of the result set. options = flight.FlightCallOptions(headers=[bearer_token]) schema = client.get_schema(flight_desc, options) print('[INFO] GetSchema was successful') print('[INFO] Schema: ', schema) # Get the FlightInfo message to retrieve the Ticket corresponding # to the query result set. flight_info = client.get_flight_info( flight.FlightDescriptor.for_command(sqlquery), options) print('[INFO] GetFlightInfo was successful') print('[INFO] Ticket: ', flight_info.endpoints[0].ticket) # Retrieve the result set as a stream of Arrow record batches. reader = client.do_get(flight_info.endpoints[0].ticket, options) print('[INFO] Reading query results from Dremio') print(reader.read_pandas()) except Exception as exception: print("[ERROR] Exception: {}".format(repr(exception))) raise
def main(): client = fl.FlightClient(location="grpc://127.0.0.1:9092") table = client.do_get(fl.Ticket('cities')).read_all() df = table.to_pandas() print(df)
def run( self, username: str, password: str, hostname: str, flightport: int, tls: bool, certs: str, query: str, ) -> Dict: """ Task run method. Executes a query against Dremio and fetches results. Args: - username (str): user name used to authenticate - password (str): password used to authenticate - hostname (str): Dremio host address - flightport (int, optional): port used to connect to FlightClient, defaults to 32010 if not provided - tls (bool): connect to the server endpoint with an encrypted TLS connection - certs (str): path to a certificate - query (str, optional): query to execute against query engine Returns: - dict: a dictionary of data returned by Dremio Raises: - ValueError: if `query` is `None` """ if not query: raise ValueError("A query string must be provided") scheme = "grpc+tcp" connection_args = {} if tls: # Connect to the server endpoint with an encrypted TLS connection. self.logger.debug("Enabling TLS connection") scheme = "grpc+tls" if certs: self.logger.debug("Trusted certificates provided") # TLS certificates are provided in a list of connection arguments. with open(certs, "rb") as root_certs: connection_args["tls_root_certs"] = root_certs.read() else: self.logger.error( "Trusted certificates must be provided to establish a TLS connection" ) sys.exit() else: self.logger.info( "You are not using a secure connection. Consider setting tls=True" ) # Two WLM settings can be provided upon initial authneitcation # with the Dremio Server Flight Endpoint: # - routing-tag # - routing queue initial_options = flight.FlightCallOptions(headers=[ (b"routing-tag", b"test-routing-tag"), (b"routing-queue", b"Low Cost User Queries"), ]) client_auth_middleware = DremioClientAuthMiddlewareFactory() client = flight.FlightClient( "{}://{}:{}".format(scheme, hostname, flightport), middleware=[client_auth_middleware], **connection_args, ) # Authenticate with the server endpoint. bearer_token = client.authenticate_basic_token(username, password, initial_options) self.logger.debug( "Authentication was successful. Token is valid for 30 hours.") # Retrieve the schema of the result set. options = flight.FlightCallOptions(headers=[bearer_token]) # Get the FlightInfo message to retrieve the Ticket corresponding # to the query result set. flight_info = client.get_flight_info( flight.FlightDescriptor.for_command(query), options) self.logger.debug("GetFlightInfo was successful") # Retrieve the result set as a stream of Arrow record batches. reader = client.do_get(flight_info.endpoints[0].ticket, options) self.logger.debug("Reading query results from Dremio") # batches of data reduce the number of calls to the server batches = [] while True: try: batch, _ = reader.read_chunk() batches.append(batch) except StopIteration: break data = pa.Table.from_batches(batches) return data.to_pydict()
def connect_to_dremio_flight_server_endpoint(host, port, username, password, query, tls, certs, disable_server_verification, pat_or_auth_token, engine, session_properties): """ Connects to Dremio Flight server endpoint with the provided credentials. It also runs the query and retrieves the result set. """ try: # Default to use an unencrypted TCP connection. scheme = "grpc+tcp" connection_args = {} if tls: # Connect to the server endpoint with an encrypted TLS connection. print('[INFO] Enabling TLS connection') scheme = "grpc+tls" if certs: print('[INFO] Trusted certificates provided') # TLS certificates are provided in a list of connection arguments. with open(certs, "rb") as root_certs: connection_args["tls_root_certs"] = root_certs.read() elif disable_server_verification: # Connect to the server endpoint with server verification disabled. print('[INFO] Disable TLS server verification.') connection_args['disable_server_verification'] = disable_server_verification else: print('[ERROR] Trusted certificates must be provided to establish a TLS connection') sys.exit() headers = session_properties if not headers: headers = [] if engine: headers.append((b'routing_engine', engine.encode('utf-8'))) # Two WLM settings can be provided upon initial authentication with the Dremio Server Flight Endpoint: # routing_tag # routing_queue headers.append((b'routing_tag', b'test-routing-tag')) headers.append((b'routing_queue', b'Low Cost User Queries')) client_cookie_middleware = CookieMiddlewareFactory() if pat_or_auth_token: client = flight.FlightClient("{}://{}:{}".format(scheme, host, port), middleware=[client_cookie_middleware], **connection_args) headers.append((b'authorization', "Bearer {}".format(pat_or_auth_token).encode('utf-8'))) print('[INFO] Authentication skipped until first request') elif username and password: client_auth_middleware = DremioClientAuthMiddlewareFactory() client = flight.FlightClient("{}://{}:{}".format(scheme, host, port), middleware=[client_auth_middleware, client_cookie_middleware], **connection_args) # Authenticate with the server endpoint. bearer_token = client.authenticate_basic_token(username, password, flight.FlightCallOptions(headers=headers)) print('[INFO] Authentication was successful') headers.append(bearer_token) else: print('[ERROR] Username/password or PAT/Auth token must be supplied.') sys.exit() if query: # Construct FlightDescriptor for the query result set. flight_desc = flight.FlightDescriptor.for_command(query) print('[INFO] Query: ', query) # In addition to the bearer token, a query context can also # be provided as an entry of FlightCallOptions. # options = flight.FlightCallOptions(headers=[ # bearer_token, # (b'schema', b'test.schema') # ]) # Retrieve the schema of the result set. options = flight.FlightCallOptions(headers=headers) schema = client.get_schema(flight_desc, options) print('[INFO] GetSchema was successful') print('[INFO] Schema: ', schema) # Get the FlightInfo message to retrieve the Ticket corresponding # to the query result set. flight_info = client.get_flight_info(flight.FlightDescriptor.for_command(query), options) print('[INFO] GetFlightInfo was successful') print('[INFO] Ticket: ', flight_info.endpoints[0].ticket) # Retrieve the result set as a stream of Arrow record batches. reader = client.do_get(flight_info.endpoints[0].ticket, options) print('[INFO] Reading query results from Dremio') print(reader.read_pandas()) except Exception as exception: print("[ERROR] Exception: {}".format(repr(exception))) raise