def test_next_reset_combined(): def udf_wrapper(): def run(ctx): for i in range(2): ctx.emit(ctx.t) if not ctx.next(): break ctx.next(reset=True) for i in range(2): ctx.emit(ctx.t+1) if not ctx.next(): break executor = UDFMockExecutor() meta = MockMetaData( script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")] ) exa = MockExaEnvironment(meta) result = executor.run([Group([(1,), (5,), (6,)])], exa) assert result == [Group([(1,), (5,),(2,), (6,)])]
def test_different_udf_wrapper_function_names(): def udf_wrapper(): def run(ctx): pass def udf_wrapper2(): def run(ctx): pass def my_wrapper(): def run(ctx): pass executor = UDFMockExecutor() meta = MockMetaData(script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) result = executor.run([Group([(1, )])], exa) assert result == [Group([])] meta = MockMetaData(script_code_wrapper_function=udf_wrapper2, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) result = executor.run([Group([(1, )])], exa) assert result == [Group([])] meta = MockMetaData(script_code_wrapper_function=my_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) result = executor.run([Group([(1, )])], exa) assert result == [Group([])]
def test_exception_udf_wrapper_with_parameter(): def udf_wrapper(param): def run(ctx): pass executor = UDFMockExecutor() with pytest.raises(Exception): meta = MockMetaData(script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) result = executor.run([Group([(1, )])], exa)
def test_emit_single_column_none(): def udf_wrapper(): def run(ctx): ctx.emit(None) executor = UDFMockExecutor() meta = MockMetaData(script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) result = executor.run([Group([(1, ), (5, ), (6, )])], exa) assert result == [Group([(None, )])]
def test_udf_wrapper_with_white_spaces_in_function_definition(): def udf_wrapper(): def run(ctx): pass executor = UDFMockExecutor() meta = MockMetaData(script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) result = executor.run([Group([(1, )])], exa) assert result == [Group([])]
def test_get_dataframe_start_col_negative(): def udf_wrapper(): def run(ctx): df = ctx.get_dataframe(num_rows=10, start_col=-1) executor = UDFMockExecutor() meta = MockMetaData(script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) with pytest.raises(RuntimeError) as excinfo: result = executor.run( [Group([(1, ), (2, ), (3, ), (4, ), (5, ), (6, )])], exa)
def test_get_dataframe_all(): def udf_wrapper(): def run(ctx): df = ctx.get_dataframe(num_rows='all') ctx.emit(df) executor = UDFMockExecutor() meta = MockMetaData(script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) result = executor.run([Group([(1, ), (5, ), (6, )])], exa) assert result == [Group([(1, ), (5, ), (6, )])]
def test_emit_tuple_exception(): def udf_wrapper(): def run(ctx): while True: ctx.emit((1, )) executor = UDFMockExecutor() meta = MockMetaData(script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) with pytest.raises(TypeError): result = executor.run( [Group([(1, ), (2, ), (3, ), (4, ), (5, ), (6, )])], exa)
def test_exa_meta_in_run(): def udf_wrapper(): def run(ctx): ctx.emit(exa.meta.script_code) executor = UDFMockExecutor() meta = MockMetaData( script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", str, "VARCHAR(2000)")] ) exa = MockExaEnvironment(meta) result = executor.run([Group([(1,)])], exa) assert result == [Group([(exa.meta.script_code,)])]
def test_emit_not_allowed(): def udf_wrapper(): def run(ctx): ctx.emit(ctx.t) executor = UDFMockExecutor() meta = MockMetaData( script_code_wrapper_function=udf_wrapper, input_type="SCALAR", input_columns=[Column("t", int, "INTEGER")], output_type="RETURNS", output_columns=[Column("t", int, "INTEGER")] ) exa = MockExaEnvironment(meta) with pytest.raises(RuntimeError): result = executor.run([Group([(1,), (5,), (6,)])], exa)
def test_simple_return(): def udf_wrapper(): def run(ctx): return ctx.t+1 executor = UDFMockExecutor() meta = MockMetaData( script_code_wrapper_function=udf_wrapper, input_type="SCALAR", input_columns=[Column("t", int, "INTEGER")], output_type="RETURNS", output_columns=[Column("t", int, "INTEGER")] ) exa = MockExaEnvironment(meta) result = executor.run([Group([(1,), (5,), (6,)])], exa) assert result == [Group([(2,), (6,), (7,)])]
def test_next_and_emit(): def udf_wrapper(): def run(ctx): while True: ctx.emit(ctx.t) if not ctx.next(): return executor = UDFMockExecutor() meta = MockMetaData(script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) result = executor.run([Group([(1, ), (5, ), (6, )])], exa) assert result == [Group([(1, ), (5, ), (6, )])]
def test_return_multi_column_none(): def udf_wrapper(): def run(ctx): return None,None executor = UDFMockExecutor() meta = MockMetaData( script_code_wrapper_function=udf_wrapper, input_type="SCALAR", input_columns=[Column("t", int, "INTEGER")], output_type="RETURNS", output_columns=[Column("t1", int, "INTEGER"), Column("t2", int, "INTEGER")] ) exa = MockExaEnvironment(meta) result = executor.run([Group([(1,), (5,), (6,)])], exa) assert result == [Group([(None,None),(None,None),(None,None)])]
def test_get_connection_in_init(): def udf_wrapper(): con = exa.get_connection("TEST_CON") def run(ctx): ctx.emit(con.address) executor = UDFMockExecutor() meta = MockMetaData( script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", str, "VARCHAR(2000)")] ) exa = MockExaEnvironment(meta, connections={"TEST_CON": Connection(address="https://test.de")}) result = executor.run([Group([(1,)])], exa) assert result == [Group([("https://test.de",)])]
def test_udf_wrapper_with_docstring_after_empty_lines(): def udf_wrapper(): """ wrapper with docstring should raise Exception, because their is no easy way to remove docstrings to get only the source witin the function """ def run(ctx): pass executor = UDFMockExecutor() meta = MockMetaData(script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) result = executor.run([Group([(1, )])], exa) assert result == [Group([])]
def test_multi_column_type(): def udf_wrapper(): def run(ctx): return ctx.t1+1, ctx.t2+1.1, ctx.t3+"1" executor = UDFMockExecutor() meta = MockMetaData( script_code_wrapper_function=udf_wrapper, input_type="SCALAR", input_columns=[Column("t1", int, "INTEGER"), Column("t2", float, "FLOAT"), Column("t3", str, "VARCHAR(20000)")], output_type="RETURNS", output_columns=[Column("t1", int, "INTEGER"), Column("t2", float, "FLOAT"), Column("t3", str, "VARCHAR(20000)")] ) exa = MockExaEnvironment(meta) result = executor.run([Group([(1,1.0,"1"), (5,5.0,"5"), (6,6.0,"6")])], exa) assert result == [Group([(2,2.1,"11"), (6,6.1,"51"), (7,7.1,"61")])]
def test_get_dataframe_iter_next(): def udf_wrapper(): def run(ctx): while True: df = ctx.get_dataframe(num_rows=2) if df is None: return else: ctx.emit(df) ctx.next() executor = UDFMockExecutor() meta = MockMetaData(script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) result = executor.run([Group([(1, ), (2, ), (3, ), (4, ), (5, ), (6, )])], exa) assert result == [Group([(1, ), (2, ), (4, ), (5, )])]