예제 #1
0
    def connect(hostname='localhost',
                port=47470,
                username='******',
                password='******',
                tls_root_certs_filename=None):
        """
        Connect to and authenticate against Dremio's arrow flight server. Auth is skipped if username is None

        :param hostname: Dremio coordinator hostname
        :param port: Dremio coordinator port
        :param username: Username on Dremio
        :param password: Password on Dremio
        :param tls_root_certs_filename: use ssl to connect with root certs from filename
        :return: arrow flight client
        """
        if tls_root_certs_filename:
            with open(tls_root_certs_filename) as f:
                tls_root_certs = f.read()
            location = 'grpc+tls://{}:{}'.format(hostname, port)
            c = flight.FlightClient(location, tls_root_certs=tls_root_certs)
        else:
            location = 'grpc+tcp://{}:{}'.format(hostname, port)
            c = flight.FlightClient(location)
        if username:
            c.authenticate(
                HttpDremioClientAuthHandler(username,
                                            password if password else ''))
        return c
예제 #2
0
 def _get_client(self):
     if self._client is None:
         self._client = fl.FlightClient(self._location)
         if self._api_key != ("", ""):
             self._client.authenticate(
                 ClientAuthenticationHandler(self._api_key))
     return self._client
예제 #3
0
 def list_flights(self, location: str):
     """
     Retrieves a list of flights
     """
     flight_client = paf.FlightClient(f"{self._scheme}://{location}",
                                      **self._connection_args)
     return flight_client.list_flights()
예제 #4
0
 def _load(self):
     client = flight.FlightClient(f'{self._protocol}://{self._hostname}')
     client.authenticate(
         HttpDremioClientAuthHandler(self._user, self._password))
     info = client.get_flight_info(
         flight.FlightDescriptor.for_command(self._sql_expr))
     reader = client.do_get(info.endpoints[0].ticket)
     self._dataframe = reader.read_pandas()
예제 #5
0
    def __init__(self, connection_string):
        # TODO: Find a better way to extend to addition flight parameters

        splits = connection_string.split(";")
        client = flight.FlightClient('grpc+tcp://{0}:{1}'.format(
            splits[2].split("=")[1], splits[3].split("=")[1]))
        client.authenticate(
            HttpDremioClientAuthHandler(splits[0].split("=")[1],
                                        splits[1].split("=")[1]))

        self.flightclient = client

        self.closed = False
        self.cursors = []
    def get_preview_data(self,
                         params: Dict,
                         optionalHeaders: Dict = None) -> Response:
        """Preview data from Dremio source
        """
        database = params.get('database')
        if database != 'DREMIO':
            logging.info('Skipping table preview for non-Dremio table')
            return make_response(jsonify({'preview_data': {}}), HTTPStatus.OK)

        try:
            # Format base SQL_STATEMENT with request table and schema
            schema = '"{}"'.format(params['schema'].replace('.', '"."'))
            table = params['tableName']
            sql = DremioPreviewClient.SQL_STATEMENT.format(schema=schema,
                                                           table=table)

            client = flight.FlightClient(self.url, **self.connection_args)
            client.authenticate(
                _DremioAuthHandler(self.username, self.password))
            flight_descriptor = flight.FlightDescriptor.for_command(sql)
            flight_info = client.get_flight_info(flight_descriptor)
            reader = client.do_get(flight_info.endpoints[0].ticket)

            result = reader.read_all()
            names = result.schema.names
            types = result.schema.types

            columns = map(lambda x: x.to_pylist(), result.columns)
            rows = [dict(zip(names, row)) for row in zip(*columns)]
            column_items = [ColumnItem(n, t) for n, t in zip(names, types)]

            preview_data = PreviewData(column_items, rows)
            try:
                data = PreviewDataSchema().dump(preview_data)
                PreviewDataSchema().load(data)  # for validation only
                payload = jsonify({'preview_data': data})
                return make_response(payload, HTTPStatus.OK)
            except ValidationError as err:
                logging.error(
                    f'Error(s) occurred while building preview data: {err.messages}'
                )
                payload = jsonify({'preview_data': {}})
                return make_response(payload, HTTPStatus.INTERNAL_SERVER_ERROR)

        except Exception as e:
            logging.error(f'Encountered exception: {e}')
            payload = jsonify({'preview_data': {}})
            return make_response(payload, HTTPStatus.INTERNAL_SERVER_ERROR)
예제 #7
0
    def __init__(self, user, password, location=(), tls=False, verifyTls=True):
        token = base64.b64encode(f'{user}:{password}'.encode('utf8'))
        self._options = flight.FlightCallOptions(headers=[(b'authorization',
                                                           b'Basic ' + token)])

        host, port = _DEFAULT_HOST, _DEFAULT_PORT
        if len(location) > 0:
            host = location[0]
        if len(location) > 1:
            port = location[1]
        if tls:
            self._location = flight.Location.for_grpc_tls(host, port)
        else:
            self._location = flight.Location.for_grpc_tcp(host, port)
        self._client = flight.FlightClient(
            self._location, disable_server_verification=(not verifyTls))
예제 #8
0
    def get_flight(self, object_id: plasma.ObjectID,
                   location: str) -> paf.FlightStreamReader:
        """
        Retreives an flight object stream
        """
        descriptor = paf.FlightDescriptor.for_path(
            object_id.binary().hex().encode("utf-8"))

        logger.debug(
            f"connecting to {self._scheme}://{location} with descriptor {descriptor}"
        )
        flight_client = paf.FlightClient(f"{self._scheme}://{location}",
                                         **self._connection_args)
        info = flight_client.get_flight_info(descriptor)
        for endpoint in info.endpoints:
            logger.debug("using endpoint locations %s", endpoint.locations)
            return flight_client.do_get(endpoint.ticket)
예제 #9
0
    def connect(
        hostname="localhost", port=32010, username="******", password="******", tls_root_certs_filename=None
    ):
        """
        Connect to and authenticate against Dremio's arrow flight server. Auth is skipped if username is None

        :param hostname: Dremio coordinator hostname
        :param port: Dremio coordinator port
        :param username: Username on Dremio
        :param password: Password on Dremio
        :param tls_root_certs_filename: use ssl to connect with root certs from filename
        :return: arrow flight client
        """
        
        scheme = "grpc+tcp"
        connection_args = {}
        
        if tls_root_certs_filename:
            with open(tls_root_certs_filename) as root_certs:
                connection_args["tls_root_certs"] = root_certs.read()
            scheme = "grpc+tls"
        else:
            # use default unencrypted TCP connection
            pass
        
        # Two WLM settings can be provided upon initial authentication
        # with the Dremio Server Flight Endpoint:
        # - routing-tag
        # - routing queue
        
        client_auth_middleware = DremioClientAuthMiddlewareFactory()
        client = flight.FlightClient("{}://{}:{}".format(scheme, hostname, port),
                                     middleware=[client_auth_middleware], **connection_args)
        
        if username and password:
            encoded_credentials = base64.b64encode(b'' + username.encode() + b':' + password.encode())
            initial_options = flight.FlightCallOptions(headers=[
                (b'authorization', b'Basic ' + encoded_credentials)
            ])
#             client.authenticate_basic_token(username, password, initial_options)
        return initial_options, client
예제 #10
0
 def exists(self,
            object_id: plasma.ObjectID,
            owner: Optional[str] = None) -> bool:
     """
     Returns true if the remote plasmaflight server contains the plasma object.
     """
     # check cache
     if self.plasma_client.contains(object_id):
         return True
     # check remote
     if owner is not None:
         client = paf.FlightClient(f"{self._scheme}://{owner}",
                                   **self._connection_args)
         try:
             info = client.get_flight_info(
                 paf.FlightDescriptor.for_path(
                     object_id.binary().hex().encode("utf-8")))
             return True
         except paf.FlightError:
             return False
     return False
예제 #11
0
파일: dremio.py 프로젝트: zviri/prefect
    def run(self, user: str, password: str, host: str, port: int,
            query: str) -> Dict:
        """
        Task run method. Executes a query against Dremio and fetches results.

        Args:
            - user (str): user name used to authenticate
            - password (str): password used to authenticate
            - host (str): Dremio host address
            - port (int, optional): port used to connect to FlightClient, defaults to 32010 if not
                provided
            - query (str, optional): query to execute against query engine

        Returns:
            - dict: a dictionary of data returned by Dremio

        Raises:
            - ValueError: if `query` is `None`
        """
        if not query:
            raise ValueError("A query string must be provided")

        client = flight.FlightClient(f"grpc+tcp://{host}:{port}")
        client.authenticate(HttpDremioClientAuthHandler(user, password))

        info = client.get_flight_info(
            flight.FlightDescriptor.for_command(query + "--arrow flight"))
        reader = client.do_get(info.endpoints[0].ticket)
        batches = []
        while True:
            try:
                batch, _ = reader.read_chunk()
                batches.append(batch)
            except StopIteration:
                break
        data = pa.Table.from_batches(batches)
        return data.to_pydict()
예제 #12
0
def connect_to_dremio_flight_server_endpoint(hostname, flightport, username,
                                             password, sqlquery, tls, certs):
    """
    Connects to Dremio Flight server endpoint with the provided credentials.
    It also runs the query and retrieves the result set.
    """

    try:
        # Default to use an unencrypted TCP connection.
        scheme = "grpc+tcp"
        connection_args = {}

        if tls:
            # Connect to the server endpoint with an encrypted TLS connection.
            print('[INFO] Enabling TLS connection')
            scheme = "grpc+tls"
            if certs:
                print('[INFO] Trusted certificates provided')
                # TLS certificates are provided in a list of connection arguments.
                with open(certs, "rb") as root_certs:
                    connection_args["tls_root_certs"] = root_certs.read()
            else:
                print(
                    '[ERROR] Trusted certificates must be provided to establish a TLS connection'
                )
                sys.exit()

        # Two WLM settings can be provided upon initial authneitcation
        # with the Dremio Server Flight Endpoint:
        # - routing-tag
        # - routing queue
        initial_options = flight.FlightCallOptions(headers=[(
            b'routing-tag',
            b'test-routing-tag'), (b'routing-queue',
                                   b'Low Cost User Queries')])
        client_auth_middleware = DremioClientAuthMiddlewareFactory()
        client = flight.FlightClient("{}://{}:{}".format(
            scheme, hostname, flightport),
                                     middleware=[client_auth_middleware],
                                     **connection_args)

        # Authenticate with the server endpoint.
        bearer_token = client.authenticate_basic_token(username, password,
                                                       initial_options)
        print('[INFO] Authentication was successful')

        if sqlquery:
            # Construct FlightDescriptor for the query result set.
            flight_desc = flight.FlightDescriptor.for_command(sqlquery)
            print('[INFO] Query: ', sqlquery)

            # In addition to the bearer token, a query context can also
            # be provided as an entry of FlightCallOptions.
            # options = flight.FlightCallOptions(headers=[
            #     bearer_token,
            #     (b'schema', b'test.schema')
            # ])

            # Retrieve the schema of the result set.
            options = flight.FlightCallOptions(headers=[bearer_token])
            schema = client.get_schema(flight_desc, options)
            print('[INFO] GetSchema was successful')
            print('[INFO] Schema: ', schema)

            # Get the FlightInfo message to retrieve the Ticket corresponding
            # to the query result set.
            flight_info = client.get_flight_info(
                flight.FlightDescriptor.for_command(sqlquery), options)
            print('[INFO] GetFlightInfo was successful')
            print('[INFO] Ticket: ', flight_info.endpoints[0].ticket)

            # Retrieve the result set as a stream of Arrow record batches.
            reader = client.do_get(flight_info.endpoints[0].ticket, options)
            print('[INFO] Reading query results from Dremio')
            print(reader.read_pandas())

    except Exception as exception:
        print("[ERROR] Exception: {}".format(repr(exception)))
        raise
예제 #13
0
def main():
    client = fl.FlightClient(location="grpc://127.0.0.1:9092")
    table = client.do_get(fl.Ticket('cities')).read_all()
    df = table.to_pandas()
    print(df)
예제 #14
0
    def run(
        self,
        username: str,
        password: str,
        hostname: str,
        flightport: int,
        tls: bool,
        certs: str,
        query: str,
    ) -> Dict:
        """
        Task run method. Executes a query against Dremio and fetches results.

        Args:
            - username (str): user name used to authenticate
            - password (str): password used to authenticate
            - hostname (str): Dremio host address
            - flightport (int, optional): port used to connect to FlightClient, defaults to 32010 if not
                provided
            - tls (bool): connect to the server endpoint with an encrypted TLS connection
            - certs (str): path to a certificate
            - query (str, optional): query to execute against query engine

        Returns:
            - dict: a dictionary of data returned by Dremio

        Raises:
            - ValueError: if `query` is `None`
        """
        if not query:
            raise ValueError("A query string must be provided")
        scheme = "grpc+tcp"
        connection_args = {}

        if tls:
            # Connect to the server endpoint with an encrypted TLS connection.
            self.logger.debug("Enabling TLS connection")
            scheme = "grpc+tls"
            if certs:
                self.logger.debug("Trusted certificates provided")
                # TLS certificates are provided in a list of connection arguments.
                with open(certs, "rb") as root_certs:
                    connection_args["tls_root_certs"] = root_certs.read()
            else:
                self.logger.error(
                    "Trusted certificates must be provided to establish a TLS connection"
                )
                sys.exit()
        else:
            self.logger.info(
                "You are not using a secure connection. Consider setting tls=True"
            )

        # Two WLM settings can be provided upon initial authneitcation
        # with the Dremio Server Flight Endpoint:
        # - routing-tag
        # - routing queue
        initial_options = flight.FlightCallOptions(headers=[
            (b"routing-tag", b"test-routing-tag"),
            (b"routing-queue", b"Low Cost User Queries"),
        ])
        client_auth_middleware = DremioClientAuthMiddlewareFactory()
        client = flight.FlightClient(
            "{}://{}:{}".format(scheme, hostname, flightport),
            middleware=[client_auth_middleware],
            **connection_args,
        )

        # Authenticate with the server endpoint.
        bearer_token = client.authenticate_basic_token(username, password,
                                                       initial_options)
        self.logger.debug(
            "Authentication was successful. Token is valid for 30 hours.")

        # Retrieve the schema of the result set.
        options = flight.FlightCallOptions(headers=[bearer_token])

        # Get the FlightInfo message to retrieve the Ticket corresponding
        # to the query result set.
        flight_info = client.get_flight_info(
            flight.FlightDescriptor.for_command(query), options)
        self.logger.debug("GetFlightInfo was successful")

        # Retrieve the result set as a stream of Arrow record batches.
        reader = client.do_get(flight_info.endpoints[0].ticket, options)
        self.logger.debug("Reading query results from Dremio")

        # batches of data reduce the number of calls to the server

        batches = []
        while True:
            try:
                batch, _ = reader.read_chunk()
                batches.append(batch)
            except StopIteration:
                break
        data = pa.Table.from_batches(batches)

        return data.to_pydict()
def connect_to_dremio_flight_server_endpoint(host, port, username, password, query,
                                             tls, certs, disable_server_verification, pat_or_auth_token,
                                             engine, session_properties):
    """
    Connects to Dremio Flight server endpoint with the provided credentials.
    It also runs the query and retrieves the result set.
    """
    try:
        # Default to use an unencrypted TCP connection.
        scheme = "grpc+tcp"
        connection_args = {}

        if tls:
            # Connect to the server endpoint with an encrypted TLS connection.
            print('[INFO] Enabling TLS connection')
            scheme = "grpc+tls"
            if certs:
                print('[INFO] Trusted certificates provided')
                # TLS certificates are provided in a list of connection arguments.
                with open(certs, "rb") as root_certs:
                    connection_args["tls_root_certs"] = root_certs.read()
            elif disable_server_verification:
                # Connect to the server endpoint with server verification disabled.
                print('[INFO] Disable TLS server verification.')
                connection_args['disable_server_verification'] = disable_server_verification
            else:
                print('[ERROR] Trusted certificates must be provided to establish a TLS connection')
                sys.exit()

        headers = session_properties
        if not headers:
            headers = []

        if engine:
            headers.append((b'routing_engine', engine.encode('utf-8')))

        # Two WLM settings can be provided upon initial authentication with the Dremio Server Flight Endpoint:
        # routing_tag
        # routing_queue
        headers.append((b'routing_tag', b'test-routing-tag'))
        headers.append((b'routing_queue', b'Low Cost User Queries'))

        client_cookie_middleware = CookieMiddlewareFactory()

        if pat_or_auth_token:
            client = flight.FlightClient("{}://{}:{}".format(scheme, host, port),
                                         middleware=[client_cookie_middleware], **connection_args)

            headers.append((b'authorization', "Bearer {}".format(pat_or_auth_token).encode('utf-8')))
            print('[INFO] Authentication skipped until first request')

        elif username and password:
            client_auth_middleware = DremioClientAuthMiddlewareFactory()
            client = flight.FlightClient("{}://{}:{}".format(scheme, host, port),
                                         middleware=[client_auth_middleware, client_cookie_middleware],
                                         **connection_args)

            # Authenticate with the server endpoint.
            bearer_token = client.authenticate_basic_token(username, password,
                                                           flight.FlightCallOptions(headers=headers))
            print('[INFO] Authentication was successful')
            headers.append(bearer_token)
        else:
            print('[ERROR] Username/password or PAT/Auth token must be supplied.')
            sys.exit()

        if query:
            # Construct FlightDescriptor for the query result set.
            flight_desc = flight.FlightDescriptor.for_command(query)
            print('[INFO] Query: ', query)

            # In addition to the bearer token, a query context can also
            # be provided as an entry of FlightCallOptions.
            # options = flight.FlightCallOptions(headers=[
            #     bearer_token,
            #     (b'schema', b'test.schema')
            # ])

            # Retrieve the schema of the result set.
            options = flight.FlightCallOptions(headers=headers)
            schema = client.get_schema(flight_desc, options)
            print('[INFO] GetSchema was successful')
            print('[INFO] Schema: ', schema)

            # Get the FlightInfo message to retrieve the Ticket corresponding
            # to the query result set.
            flight_info = client.get_flight_info(flight.FlightDescriptor.for_command(query), options)
            print('[INFO] GetFlightInfo was successful')
            print('[INFO] Ticket: ', flight_info.endpoints[0].ticket)

            # Retrieve the result set as a stream of Arrow record batches.
            reader = client.do_get(flight_info.endpoints[0].ticket, options)
            print('[INFO] Reading query results from Dremio')
            print(reader.read_pandas())

    except Exception as exception:
        print("[ERROR] Exception: {}".format(repr(exception)))
        raise