예제 #1
0
 def test_clean_multicolumn_sort_in_table_order(self):
     input_shape = TableShape(3, [
         Column("B", ColumnType.NUMBER()),
         Column("A", ColumnType.NUMBER())
     ])
     result = clean_value(ParamDType.Multicolumn(), ["A", "B"], input_shape)
     self.assertEqual(result, ["B", "A"])
예제 #2
0
    def test_clean_multichartseries_non_number_is_prompting_error(self):
        context = RenderContext(
            None, None,
            TableShape(3, [
                Column('A', ColumnType.TEXT()),
                Column('B', ColumnType.DATETIME()),
            ]), None, None)
        value = [
            {
                'column': 'A',
                'color': '#aaaaaa'
            },
            {
                'column': 'B',
                'color': '#cccccc'
            },
        ]
        with self.assertRaises(PromptingError) as cm:
            clean_value(ParamDType.Multichartseries(), value, context)

        self.assertEqual(cm.exception.errors, [
            PromptingError.WrongColumnType(['A'], 'text', frozenset({'number'
                                                                     })),
            PromptingError.WrongColumnType(['B'], 'datetime',
                                           frozenset({'number'})),
        ])
예제 #3
0
    def test_clean_multichartseries_non_number_is_prompting_error(self):
        context = RenderContext(
            None,
            None,
            TableShape(3, [
                Column("A", ColumnType.TEXT()),
                Column("B", ColumnType.DATETIME())
            ]),
            None,
            None,
        )
        value = [
            {
                "column": "A",
                "color": "#aaaaaa"
            },
            {
                "column": "B",
                "color": "#cccccc"
            },
        ]
        with self.assertRaises(PromptingError) as cm:
            clean_value(ParamDType.Multichartseries(), value, context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text",
                                               frozenset({"number"})),
                PromptingError.WrongColumnType(["B"], "datetime",
                                               frozenset({"number"})),
            ],
        )
예제 #4
0
    def test_list_prompting_error_concatenate_different_type_to_text(self):
        context = RenderContext(
            None,
            None,
            TableShape(
                3,
                [
                    Column("A", ColumnType.NUMBER()),
                    Column("B", ColumnType.DATETIME())
                ],
            ),
            None,
            None,
        )
        schema = ParamDType.List(inner_dtype=ParamDType.Column(
            column_types=frozenset({"text"})))
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, ["A", "B"], context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A", "B"], None,
                                               frozenset({"text"}))
            ],
        )
예제 #5
0
    def test_clean_multicolumn_prompting_error_convert_to_text(self):
        # TODO make this _automatic_ instead of quick-fix?
        # ... but for now: prompt for a Quick Fix.
        context = RenderContext(
            None,
            None,
            TableShape(
                3,
                [
                    Column("A", ColumnType.NUMBER()),
                    Column("B", ColumnType.DATETIME()),
                    Column("C", ColumnType.TEXT()),
                ],
            ),
            None,
            None,
        )
        with self.assertRaises(PromptingError) as cm:
            schema = ParamDType.Multicolumn(column_types=frozenset({"text"}))
            clean_value(schema, "A,B", context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A", "B"], None,
                                               frozenset({"text"}))
            ],
        )
예제 #6
0
    def test_dict_prompting_error_concatenate_different_types(self):
        context = RenderContext(
            None,
            None,
            TableShape(3, [
                Column("A", ColumnType.TEXT()),
                Column("B", ColumnType.DATETIME())
            ]),
            None,
            None,
        )
        schema = ParamDType.Dict({
            "x":
            ParamDType.Column(column_types=frozenset({"number"})),
            "y":
            ParamDType.Column(column_types=frozenset({"number"})),
        })
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, {"x": "A", "y": "B"}, context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text",
                                               frozenset({"number"})),
                PromptingError.WrongColumnType(["B"], "datetime",
                                               frozenset({"number"})),
            ],
        )
예제 #7
0
    def test_result_and_metadata_come_from_memory_when_available(self):
        columns = [
            Column("A", ColumnType.NUMBER(format="{:,d}")),
            Column("B", ColumnType.DATETIME()),
            Column("C", ColumnType.TEXT()),
            Column("D", ColumnType.TEXT()),
        ]
        result = ProcessResult(
            dataframe=pandas.DataFrame(
                {
                    "A": [1],  # int64
                    "B": [datetime.datetime(2018, 8, 20)],  # datetime64[ns]
                    "C": ["foo"],  # str
                    "D": pandas.Series(["cat"], dtype="category"),
                }
            ),
            columns=columns,
        )
        cached_result = self.wf_module.cache_render_result(self.delta.id, result)

        # cache_render_result() keeps its `result` parameter in memory, so we
        # can avoid disk entirely. Prove it by deleting from disk.
        minio.remove(minio.CachedRenderResultsBucket, cached_result.parquet_key)
        self.assertFalse(cached_result._result is None)

        self.assertEqual(cached_result.result, result)
        self.assertEqual(cached_result.nrows, 1)
        self.assertEqual(cached_result.columns, columns)
예제 #8
0
    def test_metadata_comes_from_db_columns(self):
        columns = [
            Column("A", ColumnType.NUMBER(format="{:,d}")),
            Column("B", ColumnType.DATETIME()),
            Column("C", ColumnType.TEXT()),
            Column("D", ColumnType.TEXT()),
        ]
        result = ProcessResult(
            dataframe=pandas.DataFrame(
                {
                    "A": [1],  # int64
                    "B": [datetime.datetime(2018, 8, 20)],  # datetime64[ns]
                    "C": ["foo"],  # str
                    "D": pandas.Series(["cat"], dtype="category"),
                }
            ),
            columns=columns,
        )
        cached_result = self.wf_module.cache_render_result(self.delta.id, result)

        # cache_render_result() keeps its `result` parameter in memory, so we
        # can avoid disk entirely. Prove it by deleting from disk.
        minio.remove(minio.CachedRenderResultsBucket, cached_result.parquet_key)

        # Load _new_ CachedRenderResult -- from DB columns, not memory
        fresh_wf_module = WfModule.objects.get(id=self.wf_module.id)
        cached_result = fresh_wf_module.cached_render_result
        self.assertFalse(hasattr(cached_result, "_result"))

        self.assertEqual(cached_result.nrows, 1)
        self.assertEqual(cached_result.columns, columns)
예제 #9
0
 def test_coerce_infer_columns(self):
     table = pd.DataFrame({'A': [1, 2], 'B': ['x', 'y']})
     result = ProcessResult.coerce(table)
     self.assertEqual(result.columns, [
         Column('A', ColumnType.NUMBER()),
         Column('B', ColumnType.TEXT()),
     ])
예제 #10
0
 def test_clean_multicolumn_sort_in_table_order(self):
     input_shape = TableShape(3, [
         Column('B', ColumnType.NUMBER()),
         Column('A', ColumnType.NUMBER()),
     ])
     result = clean_value(ParamDType.Multicolumn(), ['A', 'B'], input_shape)
     self.assertEqual(result, ['B', 'A'])
예제 #11
0
 def test_clean_multicolumn_missing_is_removed(self):
     input_shape = TableShape(3, [
         Column('A', ColumnType.NUMBER()),
         Column('B', ColumnType.NUMBER()),
     ])
     result = clean_value(ParamDType.Multicolumn(), ['A', 'X', 'B'],
                          input_shape)
     self.assertEqual(result, ['A', 'B'])
예제 #12
0
 def test_clean_multicolumn_missing_is_removed(self):
     input_shape = TableShape(3, [
         Column("A", ColumnType.NUMBER()),
         Column("B", ColumnType.NUMBER())
     ])
     result = clean_value(ParamDType.Multicolumn(), ["A", "X", "B"],
                          input_shape)
     self.assertEqual(result, ["A", "B"])
예제 #13
0
 def test_coerce_infer_columns(self):
     table = pd.DataFrame({"A": [1, 2], "B": ["x", "y"]})
     result = ProcessResult.coerce(table)
     self.assertEqual(
         result.columns,
         [Column("A", ColumnType.NUMBER()),
          Column("B", ColumnType.TEXT())],
     )
예제 #14
0
def check_key_types(left_dtypes, right_dtypes):
    for key in left_dtypes.index:
        l_type = ColumnType.from_dtype(left_dtypes.loc[key])
        r_type = ColumnType.from_dtype(right_dtypes.loc[key])
        if l_type != r_type:
            raise TypeError(
                f'Types do not match for key column "{key}" ({l_type.value} '
                f'and {r_type.value}). Please use a type conversion module to '
                'make these column types consistent.')
예제 #15
0
 def test_clean_multicolumn_sort_in_table_order(self):
     context = RenderContext(
         None, None,
         TableShape(3, [
             Column('B', ColumnType.NUMBER()),
             Column('A', ColumnType.NUMBER()),
         ]), None, None)
     result = clean_value(ParamDType.Multicolumn(), ['A', 'B'], context)
     self.assertEqual(result, ['B', 'A'])
예제 #16
0
 def test_clean_multicolumn_missing_is_removed(self):
     context = RenderContext(
         None, None,
         TableShape(3, [
             Column('A', ColumnType.NUMBER()),
             Column('B', ColumnType.NUMBER()),
         ]), None, None)
     result = clean_value(ParamDType.Multicolumn(), ['A', 'X', 'B'],
                          context)
     self.assertEqual(result, ['A', 'B'])
예제 #17
0
 def test_coerce_infer_columns_with_format(self):
     table = pd.DataFrame({'A': [1, 2], 'B': ['x', 'y']})
     result = ProcessResult.coerce({
         'dataframe': table,
         'column_formats': {'A': '{:,d}'},
     })
     self.assertEqual(result.columns, [
         Column('A', ColumnType.NUMBER(format='{:,d}')),
         Column('B', ColumnType.TEXT()),
     ])
예제 #18
0
 def test_coerce_infer_columns_try_fallback_columns_ignore_wrong_type(self):
     table = pd.DataFrame({'A': [1, 2], 'B': ['x', 'y']})
     result = ProcessResult.coerce(table, try_fallback_columns=[
         Column('A', ColumnType.TEXT()),
         Column('B', ColumnType.NUMBER()),
     ])
     self.assertEqual(result.columns, [
         Column('A', ColumnType.NUMBER()),
         Column('B', ColumnType.TEXT()),
     ])
예제 #19
0
 def test_clean_multichartseries_missing_is_removed(self):
     context = RenderContext(None, None, TableShape(3, [
         Column('A', ColumnType.NUMBER()),
         Column('B', ColumnType.NUMBER()),
     ]), None, None)
     value = [
         {'column': 'A', 'color': '#aaaaaa'},
         {'column': 'C', 'color': '#cccccc'},
     ]
     result = clean_value(ParamDType.Multichartseries(), value, context)
     self.assertEqual(result, [{'column': 'A', 'color': '#aaaaaa'}])
예제 #20
0
 def test_coerce_infer_columns_format_supercedes_try_fallback_columns(self):
     table = pd.DataFrame({'A': [1, 2]})
     result = ProcessResult.coerce({
         'dataframe': pd.DataFrame({'A': [1, 2]}),
         'column_formats': {'A': '{:,d}'},
     }, try_fallback_columns=[
         Column('A', ColumnType.NUMBER('{:,.2f}')),
     ])
     self.assertEqual(result.columns, [
         Column('A', ColumnType.NUMBER('{:,d}')),
     ])
예제 #21
0
 def test_ctor_infer_columns(self):
     result = ProcessResult(pd.DataFrame({
         'A': [1, 2],
         'B': ['x', 'y'],
         'C': [ np.nan, dt(2019, 3, 3, 4, 5, 6, 7) ],
     }))
     self.assertEqual(result.columns, [
         Column('A', ColumnType.NUMBER()),
         Column('B', ColumnType.TEXT()),
         Column('C', ColumnType.DATETIME()),
     ])
예제 #22
0
 def test_coerce_infer_columns_format_supercedes_try_fallback_columns(self):
     table = pd.DataFrame({"A": [1, 2]})
     result = ProcessResult.coerce(
         {
             "dataframe": pd.DataFrame({"A": [1, 2]}),
             "column_formats": {
                 "A": "{:,d}"
             },
         },
         try_fallback_columns=[Column("A", ColumnType.NUMBER("{:,.2f}"))],
     )
     self.assertEqual(result.columns,
                      [Column("A", ColumnType.NUMBER("{:,d}"))])
예제 #23
0
 def test_clean_multicolumn_sort_in_table_order(self):
     context = RenderContext(
         None,
         None,
         TableShape(3, [
             Column("B", ColumnType.NUMBER()),
             Column("A", ColumnType.NUMBER())
         ]),
         None,
         None,
     )
     result = clean_value(ParamDType.Multicolumn(), ["A", "B"], context)
     self.assertEqual(result, ["B", "A"])
예제 #24
0
 def test_coerce_infer_columns_try_fallback_columns_ignore_wrong_type(self):
     table = pd.DataFrame({"A": [1, 2], "B": ["x", "y"]})
     result = ProcessResult.coerce(
         table,
         try_fallback_columns=[
             Column("A", ColumnType.TEXT()),
             Column("B", ColumnType.NUMBER()),
         ],
     )
     self.assertEqual(
         result.columns,
         [Column("A", ColumnType.NUMBER()),
          Column("B", ColumnType.TEXT())],
     )
예제 #25
0
 def test_clean_multicolumn_missing_is_removed(self):
     context = RenderContext(
         None,
         None,
         TableShape(3, [
             Column("A", ColumnType.NUMBER()),
             Column("B", ColumnType.NUMBER())
         ]),
         None,
         None,
     )
     result = clean_value(ParamDType.Multicolumn(), ["A", "X", "B"],
                          context)
     self.assertEqual(result, ["A", "B"])
예제 #26
0
 def test_coerce_infer_columns_with_format(self):
     table = pd.DataFrame({"A": [1, 2], "B": ["x", "y"]})
     result = ProcessResult.coerce({
         "dataframe": table,
         "column_formats": {
             "A": "{:,d}"
         }
     })
     self.assertEqual(
         result.columns,
         [
             Column("A", ColumnType.NUMBER(format="{:,d}")),
             Column("B", ColumnType.TEXT()),
         ],
     )
예제 #27
0
 def test_columns(self):
     df = pd.DataFrame({
         'A': [1],  # number
         'B': ['foo'],  # str
         'C': dt(2018, 8, 20),  # datetime64
     })
     df['D'] = pd.Series(['cat'], dtype='category')
     result = ProcessResult(df)
     self.assertEqual(result.column_names, ['A', 'B', 'C', 'D'])
     self.assertEqual(result.columns, [
         Column('A', ColumnType.NUMBER()),
         Column('B', ColumnType.TEXT()),
         Column('C', ColumnType.DATETIME()),
         Column('D', ColumnType.TEXT()),
     ])
예제 #28
0
    def test_list_prompting_error_concatenate_same_type(self):
        context = RenderContext(None, None, TableShape(3, [
            Column('A', ColumnType.TEXT()),
            Column('B', ColumnType.TEXT()),
        ]), None, None)
        schema = ParamDType.List(
            inner_dtype=ParamDType.Column(column_types=frozenset({'number'}))
        )
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, ['A', 'B'], context)

        self.assertEqual(cm.exception.errors, [
            PromptingError.WrongColumnType(['A', 'B'], 'text',
                                           frozenset({'number'})),
        ])
예제 #29
0
 def test_ctor_infer_columns(self):
     result = ProcessResult(
         pd.DataFrame({
             "A": [1, 2],
             "B": ["x", "y"],
             "C": [np.nan, dt(2019, 3, 3, 4, 5, 6, 7)],
         }))
     self.assertEqual(
         result.columns,
         [
             Column("A", ColumnType.NUMBER()),
             Column("B", ColumnType.TEXT()),
             Column("C", ColumnType.DATETIME()),
         ],
     )
예제 #30
0
    def test_clean_multicolumn_from_other_tab(self):
        tab_output = ProcessResult(pd.DataFrame({'A-from-tab-2': [1, 2]}))
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wfm = tab.wf_modules.create(
            order=0, last_relevant_delta_id=workflow.last_delta_id)
        wfm.cache_render_result(workflow.last_delta_id, tab_output)

        schema = ParamDType.Dict({
            'tab':
            ParamDType.Tab(),
            'columns':
            ParamDType.Multicolumn(tab_parameter='tab'),
        })
        params = {'tab': tab.slug, 'columns': ['A-from-tab-1', 'A-from-tab-2']}
        context = RenderContext(
            workflow.id, None,
            TableShape(3, [
                Column('A-from-tab-1', ColumnType.NUMBER()),
            ]), {
                tab.slug: StepResultShape('ok', tab_output.table_shape),
            }, params)
        result = clean_value(schema, params, context)
        # result['tab'] is not what we're testing here
        self.assertEqual(result['columns'], ['A-from-tab-2'])