Ejemplo n.º 1
0
def test_function_adding():
    c = Context()

    assert not c.schema[c.schema_name].function_lists
    assert not c.schema[c.schema_name].functions

    f = lambda x: x
    c.register_function(f, "f", [("x", int)], float)

    assert "f" in c.schema[c.schema_name].functions
    assert c.schema[c.schema_name].functions["f"].func == f
    assert len(c.schema[c.schema_name].function_lists) == 2
    assert c.schema[c.schema_name].function_lists[0].name == "F"
    assert c.schema[c.schema_name].function_lists[0].parameters == [("x", int)]
    assert c.schema[c.schema_name].function_lists[0].return_type == float
    assert not c.schema[c.schema_name].function_lists[0].aggregation
    assert c.schema[c.schema_name].function_lists[1].name == "f"
    assert c.schema[c.schema_name].function_lists[1].parameters == [("x", int)]
    assert c.schema[c.schema_name].function_lists[1].return_type == float
    assert not c.schema[c.schema_name].function_lists[1].aggregation

    # Without replacement
    c.register_function(f, "f", [("x", float)], int, replace=False)

    assert "f" in c.schema[c.schema_name].functions
    assert c.schema[c.schema_name].functions["f"].func == f
    assert len(c.schema[c.schema_name].function_lists) == 4
    assert c.schema[c.schema_name].function_lists[2].name == "F"
    assert c.schema[c.schema_name].function_lists[2].parameters == [("x", float)]
    assert c.schema[c.schema_name].function_lists[2].return_type == int
    assert not c.schema[c.schema_name].function_lists[2].aggregation
    assert c.schema[c.schema_name].function_lists[3].name == "f"
    assert c.schema[c.schema_name].function_lists[3].parameters == [("x", float)]
    assert c.schema[c.schema_name].function_lists[3].return_type == int
    assert not c.schema[c.schema_name].function_lists[3].aggregation

    # With replacement
    f = lambda x: x + 1
    c.register_function(f, "f", [("x", str)], str, replace=True)

    assert "f" in c.schema[c.schema_name].functions
    assert c.schema[c.schema_name].functions["f"].func == f
    assert len(c.schema[c.schema_name].function_lists) == 2
    assert c.schema[c.schema_name].function_lists[0].name == "F"
    assert c.schema[c.schema_name].function_lists[0].parameters == [("x", str)]
    assert c.schema[c.schema_name].function_lists[0].return_type == str
    assert not c.schema[c.schema_name].function_lists[0].aggregation
    assert c.schema[c.schema_name].function_lists[1].name == "f"
    assert c.schema[c.schema_name].function_lists[1].parameters == [("x", str)]
    assert c.schema[c.schema_name].function_lists[1].return_type == str
    assert not c.schema[c.schema_name].function_lists[1].aggregation
Ejemplo n.º 2
0
    # Our custom function for tip-prediction
    # using the already loaded xgboost model
    def predict_price(total_amount, trip_distance, passenger_count):
        # Create a dataframe out of the three columns
        # and pass it to dask-xgboost, to predict
        # distributed
        X = dd.concat([total_amount, trip_distance, passenger_count],
                      axis=1).astype("float64")
        return dask_xgboost.predict(client, bst, X)

    # Create a context
    from dask_sql import Context, run_server
    c = Context()

    c.register_function(predict_price, "predict_price",
                        [("total_amount", np.float64),
                         ("trip_distance", np.float64),
                         ("passenger_count", np.float64)], np.float64)

    # Load the data from S3
    df = dd.read_csv("s3://nyc-tlc/trip data/yellow_tripdata_2019-01.csv",
                     dtype={
                         "payment_type": "UInt8",
                         "VendorID": "UInt8",
                         "passenger_count": "UInt8",
                         "RatecodeIDq": "UInt8",
                     },
                     storage_options={
                         "anon": True
                     }).persist()

    wait(df)