def test_location_invalid(): """Test constructing invalid URIs.""" with pytest.raises(pa.ArrowInvalid, match=".*Cannot parse URI:.*"): flight.connect("%") with pytest.raises(pa.ArrowInvalid, match=".*Cannot parse URI:.*"): ConstantFlightServer("%")
def read_from_endpoint(endpoint): if endpoint.locations: client = fl.connect(endpoint.locations[0]) else: client = fl.connect("grpc://localhost:{}".format(args.port)) if args.username or args.password: client.authenticate( HttpBasicClientAuthHandler(args.username, args.password)) result: fl.FlightStreamReader = client.do_get(endpoint.ticket) print(result.read_all().to_pandas())
def client(): # client = fl.connect("grpc://0.0.0.0:8815") client = fl.connect("grpc://35.168.111.94:8815") stream = client.do_get(fl.Ticket('molbeam')) for r in tqdm(stream, total=191): continue
def __init__(self, delegate, **kwargs): super().__init__(**kwargs) if delegate: self.delegate = flight.connect( delegate, middleware=(TracingClientMiddlewareFactory(), )) else: self.delegate = None
def test_flight_list_flights(): """Try a simple list_flights call.""" with ConstantFlightServer() as server: client = flight.connect(('localhost', server.port)) assert list(client.list_flights()) == [] flights = client.list_flights(ConstantFlightServer.CRITERIA) assert len(list(flights)) == 1
def test_flight_do_get_dicts(): table = simple_dicts_table() with ConstantFlightServer() as server: client = flight.connect(('localhost', server.port)) data = client.do_get(flight.Ticket(b'dicts')).read_all() assert data.equals(table)
def main(): parser = argparse.ArgumentParser() subcommands = parser.add_subparsers() cmd_get_by_t = subcommands.add_parser('get_by_ticket') cmd_get_by_t.set_defaults(action='get_by_ticket') cmd_get_by_t.add_argument('-n', '--name', type=str, help="Name of the ticket to fetch.") cmd_get_by_tp = subcommands.add_parser('get_by_ticket_pandas') cmd_get_by_tp.set_defaults(action='get_by_ticket_pandas') cmd_get_by_tp.add_argument('-n', '--name', type=str, help="Name of the ticket to fetch.") args = parser.parse_args() if not hasattr(args, 'action'): parser.print_help() sys.exit(1) commands = { 'get_by_ticket': get_by_ticket, 'get_by_ticket_pandas': get_by_ticket_pandas, } client = fl.connect("grpc://0.0.0.0:8815") commands[args.action](args, client)
def test_flight_do_get_ticket(): """Make sure Tickets get passed to the server.""" data1 = [pa.array([-10, -5, 0, 5, 10], type=pa.int32())] table = pa.Table.from_arrays(data1, names=['a']) with CheckTicketFlightServer(expected_ticket=b'the-ticket') as server: client = flight.connect(('localhost', server.port)) data = client.do_get(flight.Ticket(b'the-ticket')).read_all() assert data.equals(table)
def test_do_get_ints_pandas(): """Try a simple do_get call.""" table = simple_ints_table() with ConstantFlightServer() as server: client = flight.connect(('localhost', server.port)) data = client.do_get(flight.Ticket(b'ints')).read_pandas() assert list(data['some_ints']) == table.column(0).to_pylist()
def test_tls_override_hostname(): """Check that incorrectly overriding the hostname fails.""" certs = example_tls_certs() with ConstantFlightServer(tls_certificates=certs["certificates"]) as s: client = flight.connect(('localhost', s.port), tls_root_certs=certs["root_cert"], override_hostname="fakehostname") with pytest.raises(flight.FlightUnavailableError): client.do_get(flight.Ticket(b'ints'))
def pclient(): import ray ray.init() @ray.remote def f(batch): return 1 client = fl.connect("grpc://35.168.111.94:8815") stream = client.do_get(fl.Ticket('molbeam')) futures = [f.remote(b.data) for b in tqdm(stream, total=191)] print(ray.get(futures))
def main(port, num_repeat): global client, info client = fl.connect("grpc://localhost:{}".format(port)) info = client.get_flight_info( fl.FlightDescriptor.for_command(json.dumps(request))) print("Timing " + str(num_repeat) + " runs of retrieving the dataset:" + str( repeat(stmt="read_dataset()", setup="from __main__ import read_dataset", repeat=num_repeat, number=1)))
def main(port, num_repeat, username, password): global client, info client = fl.connect("grpc://localhost:{}".format(port)) if username or password: client.authenticate(HttpBasicClientAuthHandler(username, password)) info = client.get_flight_info( fl.FlightDescriptor.for_command(json.dumps(request))) print("Timing " + str(num_repeat) + " runs of retrieving the dataset:" + str( repeat(stmt="read_dataset()", setup="from __main__ import read_dataset", repeat=num_repeat, number=1)))
def main(): parser = argparse.ArgumentParser() subparsers = parser.add_subparsers(dest="command") client = subparsers.add_parser("client", help="Run the client.") client.add_argument("server") client.add_argument("--request-id", default=None) server = subparsers.add_parser("server", help="Run the server.") server.add_argument( "--listen", required=True, help="The location to listen on (example: grpc://localhost:5050)", ) server.add_argument( "--delegate", required=False, default=None, help=("A location to delegate to. That is, this server will " "simply call the given server for the response. Demonstrates " "propagation of the trace ID between servers."), ) args = parser.parse_args() if not getattr(args, "command"): parser.print_help() return 1 if args.command == "server": server = FlightServer( args.delegate, location=args.listen, middleware={"trace": TracingServerMiddlewareFactory()}) server.serve() elif args.command == "client": client = flight.connect( args.server, middleware=(TracingClientMiddlewareFactory(),)) if args.request_id: TraceContext.set_trace_id(args.request_id) else: TraceContext.set_trace_id("client-chosen-id") for result in client.do_action(flight.Action("get-trace-id", b"")): print(result.body.to_pybytes())
def main(port, username, password): client = fl.connect("grpc://localhost:{}".format(port)) if username or password: client.authenticate(HttpBasicClientAuthHandler(username, password)) # write the new dataset data = pa.Table.from_arrays([pa.array(range(0, 10 * 1024))], names=['a']) writer, _ = client.do_put( fl.FlightDescriptor.for_command(json.dumps(request)), data.schema) writer.write_table(data, 1024) writer.close() # now that the dataset is in place, let's try to read it info = client.get_flight_info( fl.FlightDescriptor.for_command(json.dumps(request))) endpoint = info.endpoints[0] result: fl.FlightStreamReader = client.do_get(endpoint.ticket) print(result.read_all().to_pandas())
def main(): # Location of the Flight Service host = '127.0.0.1' port = '8888' # Unique identifier for flight data flight_desc = 'spark-flight-descriptor' # --------------------------------------------- # # Run Spark to put Arrow data to Flight Service # # --------------------------------------------- # spark = SparkSession \ .builder \ .appName('spark-flight') \ .getOrCreate() df = spark.range(10) \ .select((col('id') % 2).alias('label')).withColumn('data', rand()) df.show(10) # Put the Spark DataFrame to the Flight Service SparkFlightConnector.put(df, host, port, flight_desc) # ------------------------------------------------------------- # # Create a Pandas DataFrame from a pyarrow Flight client reader # # ------------------------------------------------------------- # # Connect to the Flight service and get endpoints from FlightInfo client = pa_flight.connect((host, int(port))) desc = pa_flight.FlightDescriptor.for_path(flight_desc) info = client.get_flight_info(desc) endpoints = info.endpoints # Read all flight endpoints into pyarrow Tables tables = [] for e in endpoints: flight_reader = client.do_get(e.ticket) table = flight_reader.read_all() tables.append(table) # Convert Tables to a single Pandas DataFrame table = pa.concat_tables(tables) pdf = table.to_pandas() print(f"DataFrame from Flight streams:\n{pdf}") # ------------------------------------------------------------- # # Create tf.data.Dataset to iterate over Arrow data from Flight # # ------------------------------------------------------------- # have_tensorflow = False try: import tensorflow import tensorflow_io have_tensorflow = True except ImportError: pass if have_tensorflow: from tensorflow_flight_dataset import ArrowFlightDataset dataset = ArrowFlightDataset.from_schema(host, port, flight_desc, to_arrow_schema(df.schema)) for row in dataset: print(row) dataset.proc.terminate() spark.stop()
def __init__(self, endpoint, port, flight_command, auth_handler): self.flight_client = fl.connect("grpc://{}:{}".format(endpoint, port)) if auth_handler: self.flight_client.authenticate(auth_handler) self.flight_command = flight_command
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # This file is called from a test in test_flight.py. import time import pyarrow as pa import pyarrow.flight as flight class Server(flight.FlightServerBase): def do_put(self, context, descriptor, reader, writer): time.sleep(1) raise flight.FlightCancelledError("") if __name__ == "__main__": server = Server("grpc://localhost:0") client = flight.connect(f"grpc://localhost:{server.port}") schema = pa.schema([]) writer, reader = client.do_put( flight.FlightDescriptor.for_command(b""), schema) writer.done_writing()
def __init__(self, location, options=None): self.con = flight.connect(location) self.con.wait_for_available() self.options = options
def __init__(self, session): self.session = session self._flight_client = paflight.connect((session.host, session.port))
import json import pyarrow.flight as fl import pandas as pd # Create a Flight client client = fl.connect('grpc://my-notebook-fybrik-notebook-sample-arrow-flight-aef23.fybrik-blueprints:80') # Prepare the request request = { "asset": "fybrik-notebook-sample/paysim-csv", # To request specific columns add to the request a "columns" key with a list of column names "columns": ["amount", "oldbalanceOrg"] } # Send request and fetch result as a pandas DataFrame info = client.get_flight_info(fl.FlightDescriptor.for_command(json.dumps(request))) reader: fl.FlightStreamReader = client.do_get(info.endpoints[0].ticket) df: pd.DataFrame = reader.read_pandas() print(df)
def main(): parser = argparse.ArgumentParser() subcommands = parser.add_subparsers() cmd_get_by_t = subcommands.add_parser('get_by_ticket') cmd_get_by_t.set_defaults(action='get_by_ticket') cmd_get_by_t.add_argument('-n', '--name', type=str, help="Name of the ticket to fetch.") cmd_get_by_tp = subcommands.add_parser('get_by_ticket_pandas') cmd_get_by_tp.set_defaults(action='get_by_ticket_pandas') cmd_get_by_tp.add_argument('-n', '--name', type=str, help="Name of the ticket to fetch.") cmd_get_schema = subcommands.add_parser('get_schema') cmd_get_schema.set_defaults(action='get_schema') cmd_get_schema.add_argument('-p', '--path', type=str, help="Descriptor path.") cmd_get_endpoints = subcommands.add_parser('get_endpoints') cmd_get_endpoints.set_defaults(action='get_endpoints') cmd_get_endpoints.add_argument('-p', '--path', type=str, help="Descriptor path.") cmd_do_put = subcommands.add_parser('do_put') cmd_do_put.set_defaults(action='do_put') cmd_do_put.add_argument('-p', '--path', type=str, help="Descriptor path.") cmd_do_put.add_argument('-v', '--values', type=str, help="Values to put on server.") cmd_list_actions = subcommands.add_parser('list_actions') cmd_list_actions.set_defaults(action='list_actions') cmd_do_action = subcommands.add_parser('do_action') cmd_do_action.set_defaults(action='do_action') cmd_do_action.add_argument('-t', '--type', type=str, help="Type of action.") cmd_list_flights = subcommands.add_parser('list_flights') cmd_list_flights.set_defaults(action='list_flights') args = parser.parse_args() if not hasattr(args, 'action'): parser.print_help() sys.exit(1) commands = { 'get_by_ticket': get_by_ticket, 'get_by_ticket_pandas': get_by_ticket_pandas, 'get_schema': get_schema, 'get_endpoints': get_endpoints, 'list_flights': list_flights, 'do_put': do_put, 'list_actions': list_actions, 'do_action': do_action, } client = fl.connect("grpc://0.0.0.0:8815") commands[args.action](args, client)