def test_function_adding(): c = Context() assert not c.schema[c.schema_name].function_lists assert not c.schema[c.schema_name].functions f = lambda x: x c.register_function(f, "f", [("x", int)], float) assert "f" in c.schema[c.schema_name].functions assert c.schema[c.schema_name].functions["f"].func == f assert len(c.schema[c.schema_name].function_lists) == 2 assert c.schema[c.schema_name].function_lists[0].name == "F" assert c.schema[c.schema_name].function_lists[0].parameters == [("x", int)] assert c.schema[c.schema_name].function_lists[0].return_type == float assert not c.schema[c.schema_name].function_lists[0].aggregation assert c.schema[c.schema_name].function_lists[1].name == "f" assert c.schema[c.schema_name].function_lists[1].parameters == [("x", int)] assert c.schema[c.schema_name].function_lists[1].return_type == float assert not c.schema[c.schema_name].function_lists[1].aggregation # Without replacement c.register_function(f, "f", [("x", float)], int, replace=False) assert "f" in c.schema[c.schema_name].functions assert c.schema[c.schema_name].functions["f"].func == f assert len(c.schema[c.schema_name].function_lists) == 4 assert c.schema[c.schema_name].function_lists[2].name == "F" assert c.schema[c.schema_name].function_lists[2].parameters == [("x", float)] assert c.schema[c.schema_name].function_lists[2].return_type == int assert not c.schema[c.schema_name].function_lists[2].aggregation assert c.schema[c.schema_name].function_lists[3].name == "f" assert c.schema[c.schema_name].function_lists[3].parameters == [("x", float)] assert c.schema[c.schema_name].function_lists[3].return_type == int assert not c.schema[c.schema_name].function_lists[3].aggregation # With replacement f = lambda x: x + 1 c.register_function(f, "f", [("x", str)], str, replace=True) assert "f" in c.schema[c.schema_name].functions assert c.schema[c.schema_name].functions["f"].func == f assert len(c.schema[c.schema_name].function_lists) == 2 assert c.schema[c.schema_name].function_lists[0].name == "F" assert c.schema[c.schema_name].function_lists[0].parameters == [("x", str)] assert c.schema[c.schema_name].function_lists[0].return_type == str assert not c.schema[c.schema_name].function_lists[0].aggregation assert c.schema[c.schema_name].function_lists[1].name == "f" assert c.schema[c.schema_name].function_lists[1].parameters == [("x", str)] assert c.schema[c.schema_name].function_lists[1].return_type == str assert not c.schema[c.schema_name].function_lists[1].aggregation
# Our custom function for tip-prediction # using the already loaded xgboost model def predict_price(total_amount, trip_distance, passenger_count): # Create a dataframe out of the three columns # and pass it to dask-xgboost, to predict # distributed X = dd.concat([total_amount, trip_distance, passenger_count], axis=1).astype("float64") return dask_xgboost.predict(client, bst, X) # Create a context from dask_sql import Context, run_server c = Context() c.register_function(predict_price, "predict_price", [("total_amount", np.float64), ("trip_distance", np.float64), ("passenger_count", np.float64)], np.float64) # Load the data from S3 df = dd.read_csv("s3://nyc-tlc/trip data/yellow_tripdata_2019-01.csv", dtype={ "payment_type": "UInt8", "VendorID": "UInt8", "passenger_count": "UInt8", "RatecodeIDq": "UInt8", }, storage_options={ "anon": True }).persist() wait(df)