def test_next_reset_combined(): def udf_wrapper(): def run(ctx): for i in range(2): ctx.emit(ctx.t) if not ctx.next(): break ctx.next(reset=True) for i in range(2): ctx.emit(ctx.t+1) if not ctx.next(): break executor = UDFMockExecutor() meta = MockMetaData( script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")] ) exa = MockExaEnvironment(meta) result = executor.run([Group([(1,), (5,), (6,)])], exa) assert result == [Group([(1,), (5,),(2,), (6,)])]
def test_emit_single_column_none(): def udf_wrapper(): def run(ctx): ctx.emit(None) executor = UDFMockExecutor() meta = MockMetaData(script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) result = executor.run([Group([(1, ), (5, ), (6, )])], exa) assert result == [Group([(None, )])]
def test_udf_wrapper_with_no_empty_line_after_function_name(): def udf_wrapper(): def run(ctx): pass executor = UDFMockExecutor() meta = MockMetaData(script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) result = executor.run([Group([(1, )])], exa) assert result == [Group([])]
def test_get_dataframe_all(): def udf_wrapper(): def run(ctx): df = ctx.get_dataframe(num_rows='all') ctx.emit(df) executor = UDFMockExecutor() meta = MockMetaData(script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) result = executor.run([Group([(1, ), (5, ), (6, )])], exa) assert result == [Group([(1, ), (5, ), (6, )])]
def _next_group(self): try: self._input_group = next(self._input_groups) except StopIteration as e: self._data = None self._output_group_list = None self._output_group = None self._input_group = None self._iter = None self._len = None return False self._len = len(self._input_group) if self._len == 0: self._data = None self._output_group_list = None self._output_group = None self._input_group = None self._iter = None self._len = None raise RuntimeError("Empty input groups are not allowd") self._output_group_list = [] self._output_group = Group(self._output_group_list) self._output_groups.append(self._output_group) self._iter = iter(self._input_group) self.next() return True
def test_exa_meta_in_run(): def udf_wrapper(): def run(ctx): ctx.emit(exa.meta.script_code) executor = UDFMockExecutor() meta = MockMetaData( script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", str, "VARCHAR(2000)")] ) exa = MockExaEnvironment(meta) result = executor.run([Group([(1,)])], exa) assert result == [Group([(exa.meta.script_code,)])]
def test_next_and_emit(): def udf_wrapper(): def run(ctx): while True: ctx.emit(ctx.t) if not ctx.next(): return executor = UDFMockExecutor() meta = MockMetaData(script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) result = executor.run([Group([(1, ), (5, ), (6, )])], exa) assert result == [Group([(1, ), (5, ), (6, )])]
def test_simple_return(): def udf_wrapper(): def run(ctx): return ctx.t+1 executor = UDFMockExecutor() meta = MockMetaData( script_code_wrapper_function=udf_wrapper, input_type="SCALAR", input_columns=[Column("t", int, "INTEGER")], output_type="RETURNS", output_columns=[Column("t", int, "INTEGER")] ) exa = MockExaEnvironment(meta) result = executor.run([Group([(1,), (5,), (6,)])], exa) assert result == [Group([(2,), (6,), (7,)])]
def test_get_connection_in_init(): def udf_wrapper(): con = exa.get_connection("TEST_CON") def run(ctx): ctx.emit(con.address) executor = UDFMockExecutor() meta = MockMetaData( script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", str, "VARCHAR(2000)")] ) exa = MockExaEnvironment(meta, connections={"TEST_CON": Connection(address="https://test.de")}) result = executor.run([Group([(1,)])], exa) assert result == [Group([("https://test.de",)])]
def test_return_multi_column_none(): def udf_wrapper(): def run(ctx): return None,None executor = UDFMockExecutor() meta = MockMetaData( script_code_wrapper_function=udf_wrapper, input_type="SCALAR", input_columns=[Column("t", int, "INTEGER")], output_type="RETURNS", output_columns=[Column("t1", int, "INTEGER"), Column("t2", int, "INTEGER")] ) exa = MockExaEnvironment(meta) result = executor.run([Group([(1,), (5,), (6,)])], exa) assert result == [Group([(None,None),(None,None),(None,None)])]
def test_udf_wrapper_with_docstring_after_empty_lines(): def udf_wrapper(): """ wrapper with docstring should raise Exception, because their is no easy way to remove docstrings to get only the source witin the function """ def run(ctx): pass executor = UDFMockExecutor() meta = MockMetaData(script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) result = executor.run([Group([(1, )])], exa) assert result == [Group([])]
def test_group_with_iterable_with_size_len(): class MyIterableWithSize(IterableWithSize): def __iter__(self): raise Exception( "The group should use __len__ instead of __iter__ to determine the length" ) def __len__(self): return 3 group = Group(MyIterableWithSize()) assert len(group) == 3
def test_get_dataframe_iter_next(): def udf_wrapper(): def run(ctx): while True: df = ctx.get_dataframe(num_rows=2) if df is None: return else: ctx.emit(df) ctx.next() executor = UDFMockExecutor() meta = MockMetaData(script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) result = executor.run([Group([(1, ), (2, ), (3, ), (4, ), (5, ), (6, )])], exa) assert result == [Group([(1, ), (2, ), (4, ), (5, )])]
def test_multi_column_type(): def udf_wrapper(): def run(ctx): return ctx.t1+1, ctx.t2+1.1, ctx.t3+"1" executor = UDFMockExecutor() meta = MockMetaData( script_code_wrapper_function=udf_wrapper, input_type="SCALAR", input_columns=[Column("t1", int, "INTEGER"), Column("t2", float, "FLOAT"), Column("t3", str, "VARCHAR(20000)")], output_type="RETURNS", output_columns=[Column("t1", int, "INTEGER"), Column("t2", float, "FLOAT"), Column("t3", str, "VARCHAR(20000)")] ) exa = MockExaEnvironment(meta) result = executor.run([Group([(1,1.0,"1"), (5,5.0,"5"), (6,6.0,"6")])], exa) assert result == [Group([(2,2.1,"11"), (6,6.1,"51"), (7,7.1,"61")])]
def test_exception_udf_wrapper_with_parameter(): def udf_wrapper(param): def run(ctx): pass executor = UDFMockExecutor() with pytest.raises(Exception): meta = MockMetaData(script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) result = executor.run([Group([(1, )])], exa)
def test_get_dataframe_start_col_negative(): def udf_wrapper(): def run(ctx): df = ctx.get_dataframe(num_rows=10, start_col=-1) executor = UDFMockExecutor() meta = MockMetaData(script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) with pytest.raises(RuntimeError) as excinfo: result = executor.run( [Group([(1, ), (2, ), (3, ), (4, ), (5, ), (6, )])], exa)
def test_emit_tuple_exception(): def udf_wrapper(): def run(ctx): while True: ctx.emit((1, )) executor = UDFMockExecutor() meta = MockMetaData(script_code_wrapper_function=udf_wrapper, input_type="SET", input_columns=[Column("t", int, "INTEGER")], output_type="EMITS", output_columns=[Column("t", int, "INTEGER")]) exa = MockExaEnvironment(meta) with pytest.raises(TypeError): result = executor.run( [Group([(1, ), (2, ), (3, ), (4, ), (5, ), (6, )])], exa)
def test_emit_not_allowed(): def udf_wrapper(): def run(ctx): ctx.emit(ctx.t) executor = UDFMockExecutor() meta = MockMetaData( script_code_wrapper_function=udf_wrapper, input_type="SCALAR", input_columns=[Column("t", int, "INTEGER")], output_type="RETURNS", output_columns=[Column("t", int, "INTEGER")] ) exa = MockExaEnvironment(meta) with pytest.raises(RuntimeError): result = executor.run([Group([(1,), (5,), (6,)])], exa)
def test_group_prefix_equal_but_second_group_is_longer(): group1 = Group([(1, ), (2, ), (3, )]) group2 = Group([(1, ), (2, ), (3, ), (4, )]) assert group1 != group2
def test_group_same_length_difference_in_the_middle(): group1 = Group([(1, ), (2, ), (5, ), (4, )]) group2 = Group([(1, ), (2, ), (3, ), (4, )]) assert group1 != group2
def test_group_has_tuple_as_iterable_but_rows_is_list(): group = Group(((1, ), (2, ), (5, ), (4, ))) assert group.rows == [(1, ), (2, ), (5, ), (4, )]
def test_group_len(): group = Group(((1, ), (2, ), (5, ), (4, ))) assert len(group) == 4
def test_group_iter(): group = Group(((1, ), (2, ), (5, ), (4, ))) assert list(iter(group)) == [(1, ), (2, ), (5, ), (4, )]
def test_group_with_custom_iterable_rows(): group = Group(MyIterable()) assert group.rows == [(1, ), (2, ), (3, )]
def test_groups_are_equal(): group1 = Group([(1, ), (2, ), (3, )]) group2 = Group([(1, ), (2, ), (3, )]) assert group1 == group2
def test_group_with_custom_iterable_len(): group = Group(MyIterable()) assert len(group) == 3