def test_allow_different_columns(self): result = render(pd.DataFrame({'A': [1, 2]}), params={ 'tabs': [ TabOutput( 'tab-2', 'Tab 2', { 'B': RenderColumn('B', 'number', '{}'), }, pd.DataFrame({'B': [3, 4]})), ], 'add_source_column': False, 'source_column_name': '', }, tab_name='Tab 1', input_columns={ 'A': RenderColumn('A', 'number', '{}'), }) # This tests the ordering of columns, too assert_frame_equal( result, pd.DataFrame({ 'A': [1, 2, np.nan, np.nan], 'B': [np.nan, np.nan, 3, 4], }))
def test_prevent_overwrite(self): left = pd.DataFrame({'A': [1, 2, 3], 'B': ['x', 'y', 'z']}) right = pd.DataFrame({'A': ['1', '2'], 'B': ['X', 'Y']}) result = render(left, { 'right_tab': TabOutput( 'slug', 'Tab 2', {'A': RenderColumn('A', 'number', '{}'), 'B': RenderColumn('B', 'text', None)}, right ), 'join_columns': { 'on': ['A'], 'right': ['B'], }, 'type': 'left', }, input_columns={ 'A': RenderColumn('A', 'number', '{}'), 'B': RenderColumn('B', 'text', None), }) self.assertEqual(result, ( 'You tried to add "B" from Tab 2, but your table already has that ' 'column. Please rename the column in one of the tabs, or unselect ' 'the column.' ))
def test_import_columns_without_formats(self): dataframe = pd.DataFrame( { "A": [1, 2, 3], "B": pd.Series( ["2012-01-01", "2015-02-03", "2019-05-23"], dtype="datetime64[ns]" ), "C": ["a", "b", "c"], } ) result = render( pd.DataFrame(), { "tab": TabOutput( "tab-2", "Tab 2", { "A": RenderColumn("A", "number", "{,.2f}"), "B": RenderColumn("B", "datetime", None), "C": RenderColumn("C", "text", None), }, dataframe, ) }, ) assert_frame_equal(result["dataframe"], dataframe) self.assertEqual(result["column_formats"], {"A": "{,.2f}"})
def test_add_source_column(self): result = render( pd.DataFrame({'A': [1, 2]}), params={ 'tabs': [ TabOutput('tab-2', 'Tab 2', { 'A': RenderColumn('A', 'number'), }, pd.DataFrame({'A': [3, 4]})), ], 'add_source_column': True, 'source_column_name': 'S', }, tab_name='Tab 1', input_columns={ 'A': RenderColumn('A', 'number'), } ) expected = pd.DataFrame({ # Source column comes _first_ 'S': ['Tab 1', 'Tab 1', 'Tab 2', 'Tab 2'], 'A': [1, 2, 3, 4], }) # Source column should be categorical: no need to load it with useless # copied bytes. expected['S'] = expected['S'].astype('category') assert_frame_equal(result, expected)
def test_left_join_delete_unused_categories_in_added_columns(self): left = pd.DataFrame({'A': ['a', 'b']}, dtype='category') right = pd.DataFrame({ 'A': pd.Series(['a', 'z'], dtype='category'), 'B': pd.Series(['x', 'y'], dtype='category'), }) result = render(left, { 'right_tab': TabOutput( 'slug', 'Tab 2', {'A': RenderColumn('A', 'text', None), 'B': RenderColumn('B', 'text', None)}, right ), 'join_columns': {'on': ['A'], 'right': ['B']}, 'type': 'left', }, input_columns={ 'A': RenderColumn('A', 'text', None), }) # 'z' category does not appear in result, so it should not be a # category in the 'B' column. assert_frame_equal(result['dataframe'], pd.DataFrame({ 'A': pd.Series(['a', 'b'], dtype='category'), 'B': pd.Series(['x', np.nan], dtype='category') }))
def test_error_different_types(self): result = render( pd.DataFrame({"A": ["x", "y"]}), params={ "tabs": [ TabOutput( "tab-2", "Tab 2", {"A": RenderColumn("A", "number", "{}")}, pd.DataFrame({"A": [3, 4]}), ) ], "add_source_column": False, "source_column_name": "", }, tab_name="Tab 1", input_columns={"A": RenderColumn("A", "text", None)}, ) self.assertEqual( result, ('Cannot concatenate column "A" of type "number" in "Tab 2" to ' 'column "A" of type "text" in "Tab 1". Please convert one or the ' "other so they are the same type."), )
def test_allow_different_columns(self): result = render( pd.DataFrame({"A": [1, 2]}), params={ "tabs": [ TabOutput( "tab-2", "Tab 2", {"B": RenderColumn("B", "number", "{}")}, pd.DataFrame({"B": [3, 4]}), ) ], "add_source_column": False, "source_column_name": "", }, tab_name="Tab 1", input_columns={"A": RenderColumn("A", "number", "{}")}, ) # This tests the ordering of columns, too assert_frame_equal( result, pd.DataFrame({ "A": [1, 2, np.nan, np.nan], "B": [np.nan, np.nan, 3, 4] }), )
def test_inner_join_delete_unused_categories_in_all_columns(self): left = pd.DataFrame({ 'A': pd.Series(['a', 'b'], dtype='category'), # join column 'B': pd.Series(['c', 'd'], dtype='category'), # other column }) right = pd.DataFrame({ 'A': pd.Series(['a', 'x'], dtype='category'), # join column 'C': pd.Series(['e', 'y'], dtype='category'), # other column }) result = render(left, { 'right_tab': TabOutput( 'slug', 'Tab 2', {'A': RenderColumn('A', 'text', None), 'C': RenderColumn('C', 'text', None)}, right ), 'join_columns': {'on': ['A'], 'right': ['C']}, 'type': 'inner', }, input_columns={ 'A': RenderColumn('A', 'text', None), 'B': RenderColumn('B', 'text', None), }) # 'b', 'd', 'x' and 'y' categories don't appear in the result, so the # dtypes should not contain them. assert_frame_equal(result['dataframe'], pd.DataFrame({ 'A': ['a'], 'B': ['c'], 'C': ['e'], }, dtype='category'))
def test_left(self): left = pd.DataFrame({'A': [1, 2, 3], 'B': ['x', 'y', 'z']}) right = pd.DataFrame({'A': [1, 2], 'C': ['X', 'Y'], 'D': [0.1, 0.2]}) result = render(left, { 'right_tab': TabOutput( 'slug', 'name', {'A': RenderColumn('A', 'number', '{:,.2f}'), 'C': RenderColumn('C', 'text', None), 'D': RenderColumn('D', 'number', '{:,}')}, right), 'join_columns': { 'on': ['A'], 'right': ['C', 'D'], }, 'type': 'left', }, input_columns={ 'A': RenderColumn('A', 'number', '{:d}'), 'B': RenderColumn('B', 'text', None), }) assert_frame_equal(result['dataframe'], pd.DataFrame({ 'A': [1, 2, 3], 'B': ['x', 'y', 'z'], 'C': ['X', 'Y', np.nan], 'D': [0.1, 0.2, np.nan], })) self.assertEqual(result['column_formats'], {'C': None, 'D': '{:,}'})
def test_add_source_column(self): result = render( pd.DataFrame({"A": [1, 2]}), params={ "tabs": [ TabOutput( "tab-2", "Tab 2", {"A": RenderColumn("A", "number", "{}")}, pd.DataFrame({"A": [3, 4]}), ) ], "add_source_column": True, "source_column_name": "S", }, tab_name="Tab 1", input_columns={"A": RenderColumn("A", "number", "{}")}, ) expected = pd.DataFrame({ # Source column comes _first_ "S": ["Tab 1", "Tab 1", "Tab 2", "Tab 2"], "A": [1, 2, 3, 4], }) # Source column should be categorical: no need to load it with useless # copied bytes. expected["S"] = expected["S"].astype("category") assert_frame_equal(result, expected)
def test_prevent_overwrite(self): left = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]}) right = pd.DataFrame({"A": ["1", "2"], "B": ["X", "Y"]}) result = render( left, { "right_tab": TabOutput( "slug", "Tab 2", { "A": RenderColumn("A", "number", "{}"), "B": RenderColumn("B", "text", None), }, right, ), "join_columns": { "on": ["A"], "right": ["B"] }, "type": "left", }, input_columns={ "A": RenderColumn("A", "number", "{}"), "B": RenderColumn("B", "text", None), }, ) self.assertEqual( result, ('You tried to add "B" from Tab 2, but your table already has that ' "column. Please rename the column in one of the tabs, or unselect " "the column."), )
def test_on_types_differ(self): left = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]}) right = pd.DataFrame({"A": ["1", "2"], "C": ["X", "Y"]}) result = render( left, { "right_tab": TabOutput( "slug", "Tab 2", { "A": RenderColumn("A", "text", None), "C": RenderColumn("C", "text", None), }, right, ), "join_columns": { "on": ["A"], "right": ["C"] }, "type": "left", }, input_columns={ "A": RenderColumn("A", "number", "{}"), "B": RenderColumn("B", "text", None), }, ) self.assertEqual( result, ('Column "A" is *number* in this tab and *text* in Tab 2. ' "Please convert one or the other so they are both the same type." ), )
def test_happy_path(self): result = render( pd.DataFrame(), { 'tab': TabOutput('tab-2', 'Tab 2', { 'A': RenderColumn('A', 'number', '{}'), }, pd.DataFrame({'A': [3, 4]})), }) assert_frame_equal(result['dataframe'], pd.DataFrame({'A': [3, 4]})) self.assertEqual(result['column_formats'], {'A': '{}'})
def test_happy_path(self): result = render( pd.DataFrame(), { "tab": TabOutput( "tab-2", "Tab 2", {"A": RenderColumn("A", "number", "{}")}, pd.DataFrame({"A": [3, 4]}), ) }, ) assert_frame_equal(result["dataframe"], pd.DataFrame({"A": [3, 4]})) self.assertEqual(result["column_formats"], {"A": "{}"})
def _(dtype: ParamDType.Tab, value: str, context: RenderContext) -> TabOutput: tab_slug = value try: shape = context.tab_shapes[tab_slug] except KeyError: # It's a tab that doesn't exist. return None if shape is None: # It's an un-rendered tab. Or at least, the executor _tells_ us it's # un-rendered. That means there's a tab-cycle. raise TabCycleError if shape.status != 'ok': raise TabOutputUnreachableError # Load Tab output from database. Assumes we've locked the workflow. try: tab = Tab.objects.get( workflow_id=context.workflow_id, is_deleted=False, slug=tab_slug ) except Tab.DoesNotExist: # If the Tab doesn't exist, someone deleted it mid-render. (We already # verified that the tab has been rendered -- that was # context.tab_shapes[tab_slug].) So our param is stale. raise UnneededExecution wf_module = tab.live_wf_modules.last() if wf_module is None: # empty tab -> empty output raise TabOutputUnreachableError crr = wf_module.cached_render_result if crr is None: # ... but tab_shapes implies we just cached the correct result! It # looks like that version must be stale. raise UnneededExecution result = crr.result # read Parquet file from disk (slow) return TabOutput( tab_slug, tab.name, dict((c.name, RenderColumn(c.name, c.type.name, getattr(c.type, 'format', None))) for c in result.columns), result.dataframe )
def test_inner_join_delete_unused_categories_in_all_columns(self): left = pd.DataFrame({ "A": pd.Series(["a", "b"], dtype="category"), # join column "B": pd.Series(["c", "d"], dtype="category"), # other column }) right = pd.DataFrame({ "A": pd.Series(["a", "x"], dtype="category"), # join column "C": pd.Series(["e", "y"], dtype="category"), # other column }) result = render( left, { "right_tab": TabOutput( "slug", "Tab 2", { "A": RenderColumn("A", "text", None), "C": RenderColumn("C", "text", None), }, right, ), "join_columns": { "on": ["A"], "right": ["C"] }, "type": "inner", }, input_columns={ "A": RenderColumn("A", "text", None), "B": RenderColumn("B", "text", None), }, ) # 'b', 'd', 'x' and 'y' categories don't appear in the result, so the # dtypes should not contain them. assert_frame_equal( result["dataframe"], pd.DataFrame({ "A": ["a"], "B": ["c"], "C": ["e"] }, dtype="category"), )
def test_right_join_delete_unused_categories_in_input_columns(self): left = pd.DataFrame({ "A": pd.Series(["a", "b"], dtype="category"), # join column "B": pd.Series(["c", "d"], dtype="category"), # other column }) right = pd.DataFrame({ "A": pd.Series(["a"], dtype="category"), "C": ["e"] } # join column ) result = render( left, { "right_tab": TabOutput( "slug", "Tab 2", { "A": RenderColumn("A", "text", None), "C": RenderColumn("C", "text", None), }, right, ), "join_columns": { "on": ["A"], "right": ["C"] }, "type": "right", }, input_columns={ "A": RenderColumn("A", "text", None), "B": RenderColumn("B", "text", None), }, ) # 'b' and 'd' categories don't appear in result, so it should not be # categories in the result dataframe. assert_frame_equal( result["dataframe"], pd.DataFrame({ "A": pd.Series(["a"], dtype="category"), "B": pd.Series(["c"], dtype="category"), "C": ["e"], }), )
def test_happy_path(self): result = render( pd.DataFrame({'A': [1, 2]}), params={ 'tabs': [ TabOutput('tab-2', 'Tab 2', { 'A': RenderColumn('A', 'number'), }, pd.DataFrame({'A': [3, 4]})), ], 'add_source_column': False, 'source_column_name': '', }, tab_name='Tab 1', input_columns={ 'A': RenderColumn('A', 'number'), } ) assert_frame_equal(result, pd.DataFrame({'A': [1, 2, 3, 4]}))
def test_coerce_categories_and_str(self): result = render( pd.DataFrame({'A': ['a', 'b']}, dtype='category'), # cat params={ 'tabs': [ TabOutput('tab-2', 'Tab 2', { 'A': RenderColumn('A', 'text'), }, pd.DataFrame({'A': ['c', 'd']})), # str ], 'add_source_column': False, 'source_column_name': '', }, tab_name='Tab 1', input_columns={ 'A': RenderColumn('A', 'text'), } ) assert_frame_equal(result, pd.DataFrame({ 'A': ['a', 'b', 'c', 'd'], # str }))
def test_happy_path(self): result = render( pd.DataFrame({"A": [1, 2]}), params={ "tabs": [ TabOutput( "tab-2", "Tab 2", {"A": RenderColumn("A", "number", "{}")}, pd.DataFrame({"A": [3, 4]}), ) ], "add_source_column": False, "source_column_name": "", }, tab_name="Tab 1", input_columns={"A": RenderColumn("A", "number", "{}")}, ) assert_frame_equal(result, pd.DataFrame({"A": [1, 2, 3, 4]}))
def test_import_columns_without_formats(self): dataframe = pd.DataFrame({ 'A': [1, 2, 3], 'B': pd.Series(['2012-01-01', '2015-02-03', '2019-05-23'], dtype='datetime64[ns]'), 'C': ['a', 'b', 'c'], }) result = render( pd.DataFrame(), { 'tab': TabOutput( 'tab-2', 'Tab 2', { 'A': RenderColumn('A', 'number', '{,.2f}'), 'B': RenderColumn('B', 'datetime', None), 'C': RenderColumn('C', 'text', None), }, dataframe) }) assert_frame_equal(result['dataframe'], dataframe) self.assertEqual(result['column_formats'], {'A': '{,.2f}'})
def test_error_different_types(self): result = render( pd.DataFrame({'A': ['x', 'y']}), params={ 'tabs': [ TabOutput('tab-2', 'Tab 2', { 'A': RenderColumn('A', 'number'), }, pd.DataFrame({'A': [3, 4]})), ], 'add_source_column': False, 'source_column_name': '', }, tab_name='Tab 1', input_columns={ 'A': RenderColumn('A', 'text'), } ) self.assertEqual(result, ( 'Cannot concatenate column "A" of type "number" in "Tab 2" to ' 'column "A" of type "text" in "Tab 1". Please convert one or the ' 'other so they are the same type.' ))
def test_left(self): left = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]}) right = pd.DataFrame({"A": [1, 2], "C": ["X", "Y"], "D": [0.1, 0.2]}) result = render( left, { "right_tab": TabOutput( "slug", "name", { "A": RenderColumn("A", "number", "{:,.2f}"), "C": RenderColumn("C", "text", None), "D": RenderColumn("D", "number", "{:,}"), }, right, ), "join_columns": { "on": ["A"], "right": ["C", "D"] }, "type": "left", }, input_columns={ "A": RenderColumn("A", "number", "{:d}"), "B": RenderColumn("B", "text", None), }, ) assert_frame_equal( result["dataframe"], pd.DataFrame({ "A": [1, 2, 3], "B": ["x", "y", "z"], "C": ["X", "Y", np.nan], "D": [0.1, 0.2, np.nan], }), ) self.assertEqual(result["column_formats"], {"C": None, "D": "{:,}"})
def test_coerce_categories_and_str(self): result = render( pd.DataFrame({"A": ["a", "b"]}, dtype="category"), # cat params={ "tabs": [ TabOutput( "tab-2", "Tab 2", {"A": RenderColumn("A", "text", None)}, pd.DataFrame({"A": ["c", "d"]}), ) # str ], "add_source_column": False, "source_column_name": "", }, tab_name="Tab 1", input_columns={"A": RenderColumn("A", "text", None)}, ) assert_frame_equal(result, pd.DataFrame({"A": ["a", "b", "c", "d"]})) # str
def test_left_join_delete_unused_categories_in_added_columns(self): left = pd.DataFrame({"A": ["a", "b"]}, dtype="category") right = pd.DataFrame({ "A": pd.Series(["a", "z"], dtype="category"), "B": pd.Series(["x", "y"], dtype="category"), }) result = render( left, { "right_tab": TabOutput( "slug", "Tab 2", { "A": RenderColumn("A", "text", None), "B": RenderColumn("B", "text", None), }, right, ), "join_columns": { "on": ["A"], "right": ["B"] }, "type": "left", }, input_columns={"A": RenderColumn("A", "text", None)}, ) # 'z' category does not appear in result, so it should not be a # category in the 'B' column. assert_frame_equal( result["dataframe"], pd.DataFrame({ "A": pd.Series(["a", "b"], dtype="category"), "B": pd.Series(["x", np.nan], dtype="category"), }), )
def test_on_types_differ(self): left = pd.DataFrame({'A': [1, 2, 3], 'B': ['x', 'y', 'z']}) right = pd.DataFrame({'A': ['1', '2'], 'C': ['X', 'Y']}) result = render(left, { 'right_tab': TabOutput( 'slug', 'Tab 2', {'A': RenderColumn('A', 'text', None), 'C': RenderColumn('C', 'text', None)}, right), 'join_columns': { 'on': ['A'], 'right': ['C'], }, 'type': 'left', }, input_columns={ 'A': RenderColumn('A', 'number', '{}'), 'B': RenderColumn('B', 'text', None), }) self.assertEqual(result, ( 'Column "A" is *number* in this tab and *text* in Tab 2. ' 'Please convert one or the other so they are both the same type.' ))
def test_left(self): left = pd.DataFrame({'A': [1, 2, 3], 'B': ['x', 'y', 'z']}) right = pd.DataFrame({'A': [1, 2], 'C': ['X', 'Y']}) result = render(left, { 'right_tab': TabOutput( 'slug', 'name', {'A': RenderColumn('A', 'number'), 'C': RenderColumn('C', 'text')}, right), 'join_columns': { 'on': 'A', 'right': 'C', }, 'type': 0, }, input_columns={ 'A': RenderColumn('A', 'number'), 'B': RenderColumn('B', 'text'), }) assert_frame_equal(result, pd.DataFrame({ 'A': [1, 2, 3], 'B': ['x', 'y', 'z'], 'C': ['X', 'Y', np.nan], }))
def test_import_empty_tab(self): result = render( pd.DataFrame(), {'tab': TabOutput('tab-2', 'Tab 2', {}, pd.DataFrame())}) assert_frame_equal(result['dataframe'], pd.DataFrame()) self.assertEqual(result['column_formats'], {})
def test_import_empty_tab(self): result = render( pd.DataFrame(), {"tab": TabOutput("tab-2", "Tab 2", {}, pd.DataFrame())} ) assert_frame_equal(result["dataframe"], pd.DataFrame()) self.assertEqual(result["column_formats"], {})