Example #1
0
    create_add_one_struct_udf(
        result_formatter=lambda v1, v2: [np.array(v1),
                                         np.array(v2)]),  # list of np.array,
    create_add_one_struct_udf(result_formatter=lambda v1, v2: np.array(
        [np.array(v1), np.array(v2)])),  # np.array of np.array,
    create_add_one_struct_udf(
        result_formatter=lambda v1, v2: pd.DataFrame({
            'col1': v1,
            'col2': v2
        })),  # pd.DataFrame,
]


@elementwise(
    input_type=[dt.double],
    output_type=dt.Struct(['double_col', 'col2'], [dt.double, dt.double]),
)
def overwrite_struct_elementwise(v):
    assert isinstance(v, pd.Series)
    return v + 1, v + 2


@elementwise(
    input_type=[dt.double],
    output_type=dt.Struct(['double_col', 'col2', 'float_col'],
                          [dt.double, dt.double, dt.double]),
)
def multiple_overwrite_struct_elementwise(v):
    assert isinstance(v, pd.Series)
    return v + 1, v + 2, v + 3
Example #2
0
def create_demean_struct_udf(result_formatter):
    return analytic(
        input_type=[dt.double, dt.double],
        output_type=dt.Struct(['demean', 'demean_weight'],
                              [dt.double, dt.double]),
    )(_format_struct_udf_return_type(demean_struct, result_formatter))
Example #3
0
    return s + 1


@analytic(input_type=[dt.double], output_type=dt.double)
def calc_zscore(s):
    return (s - s.mean()) / s.std()


@reduction(input_type=[dt.double], output_type=dt.double)
def calc_mean(s):
    return s.mean()


@elementwise(
    input_type=[dt.double],
    output_type=dt.Struct(['col1', 'col2'], [dt.double, dt.double]),
)
def add_one_struct(v):
    return v + 1, v + 2


@analytic(
    input_type=[dt.double, dt.double],
    output_type=dt.Struct(['demean', 'demean_weight'], [dt.double, dt.double]),
)
def demean_struct(v, w):
    return v - v.mean(), w - w.mean()


@reduction(
    input_type=[dt.double, dt.double],
Example #4
0
def test_struct_from_dict():
    result = dt.Struct.from_dict({'b': 'int64', 'a': dt.float64})

    assert result == dt.Struct(names=['b', 'a'], types=[dt.int64, dt.float64])
Example #5
0
def create_add_one_struct_udf(result_formatter):
    return elementwise(
        input_type=[dt.double],
        output_type=dt.Struct(['col1', 'col2'], [dt.double, dt.double]),
    )(_format_struct_udf_return_type(add_one_struct, result_formatter))
Example #6
0
def create_mean_struct_udf(result_formatter):
    return reduction(
        input_type=[dt.double, dt.double],
        output_type=dt.Struct(['mean', 'mean_weight'], [dt.double, dt.double]),
    )(_format_struct_udf_return_type(mean_struct, result_formatter))
Example #7
0
def spark_struct_dtype_to_ibis_dtype(spark_dtype_obj, nullable=True):
    names = spark_dtype_obj.names
    fields = spark_dtype_obj.fields
    ibis_types = [dt.dtype(f.dataType, nullable=f.nullable) for f in fields]
    return dt.Struct(names, ibis_types, nullable=nullable)