Ejemplo n.º 1
0
# ### Select row where column "filter" is "integer"

from optimus.engines.spark.audf import filter_row_by_data_type as fbdt
df.rows.select(fbdt("filter", "integer")).table()

# ### Create an abstract spark to filter a rows where the value of column "num"> 1

# +
from optimus.engines.spark.audf import abstract_udf as audf


def func(val, attr):
    return val > 1


df.rows.select(audf("num", func, "boolean")).table()
# -

# ### Create an abstract spark (Pandas UDF) to pass two arguments to a function a apply a sum operation

# +
from optimus.engines.spark.audf import abstract_udf as audf


def func(val, attr):
    return val + attr[0] + attr[1]


df.withColumn("num_sum", audf("num", func, "int", [10, 20])).table()

# -
Ejemplo n.º 2
0
t.create(None, "rows.select_by_dtypes", None, "df", None, "filter", "integer")

fil = (source_df["num"] == 2) | (source_df["second"] == 5)
print(str(fil))
# type(fil)

t.create(None, "rows.drop", None, "df", None, fil)

t.create(None, "rows.drop_by_dtypes", None, "df", None, "filter", "integer")


def func_data_type(value, attr):
    return value > 1


a = audf("num", func_data_type, "boolean")

t.create(None, "rows.drop", "audf", "df", None, a)

t.create(None, "rows.sort", None, "df", None, "num", "desc")

t.create(None, "rows.is_in", None, "df", None, "num", 2)

t.create(None, "rows.between", None, "df", None, "second", 6, 8)

t.create(None, "rows.between", "equal", "df", None, "second", 6, 8, equal=True)

t.create(None, "rows.between", "invert_equal", "df", None, "second", 6, 8, invert=True, equal=True)

t.create(None, "rows.between", "bounds", "df", None, "second", bounds=[(6, 7), (7, 8)], invert=True, equal=True)