Example #1
0
    def test_dict_prompting_error_concatenate_different_types(self):
        context = RenderContext(
            None,
            None,
            TableShape(3, [
                Column("A", ColumnType.TEXT()),
                Column("B", ColumnType.DATETIME())
            ]),
            None,
            None,
        )
        schema = ParamDType.Dict({
            "x":
            ParamDType.Column(column_types=frozenset({"number"})),
            "y":
            ParamDType.Column(column_types=frozenset({"number"})),
        })
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, {"x": "A", "y": "B"}, context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text",
                                               frozenset({"number"})),
                PromptingError.WrongColumnType(["B"], "datetime",
                                               frozenset({"number"})),
            ],
        )
    def test_clean_condition_timestamp_wrong_value(self):
        context = self._render_context(
            input_table=arrow_table(
                {"A": pa.array([datetime.now()], pa.timestamp("ns"))}
            )
        )
        with self.assertRaises(PromptingError) as cm:
            clean_value(
                ParamDType.Condition(),
                {
                    "operation": "timestamp_is_greater_than",
                    "column": "A",
                    "value": "Yesterday",
                    "isCaseSensitive": False,
                    "isRegex": False,
                },
                context,
            )

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.CannotCoerceValueToTimestamp("Yesterday"),
            ],
        )
Example #3
0
    def test_list_prompting_error_concatenate_different_type_to_text(self):
        context = RenderContext(
            None,
            None,
            TableShape(
                3,
                [
                    Column("A", ColumnType.NUMBER()),
                    Column("B", ColumnType.DATETIME())
                ],
            ),
            None,
            None,
        )
        schema = ParamDType.List(inner_dtype=ParamDType.Column(
            column_types=frozenset({"text"})))
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, ["A", "B"], context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A", "B"], None,
                                               frozenset({"text"}))
            ],
        )
Example #4
0
    def test_clean_tab_tab_delete_race_raises_unneededexecution(self):
        """
        If a user deletes the tab during render, raise UnneededExecution.

        It doesn't really matter _what_ the return value is, since the render()
        result will never be saved if this WfModule's delta has changed.
        UnneededExecution just seems like the quickest way out of this mess:
        it's an error the caller is meant to raise anyway, unlike
        `Tab.DoesNotExist`.
        """
        # tab_output is what 'render' _thinks_ the output should be
        tab_output = ProcessResult(pd.DataFrame({'A': [1, 2]}))

        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wfm = tab.wf_modules.create(
            order=0, last_relevant_delta_id=workflow.last_delta_id)
        wfm.cache_render_result(workflow.last_delta_id, tab_output)
        tab.is_deleted = True
        tab.save(update_fields=['is_deleted'])
        # Simulate reality: wfm.last_relevant_delta_id will change
        wfm.last_relevant_delta_id += 1
        wfm.save(update_fields=['last_relevant_delta_id'])

        context = RenderContext(
            workflow.id, None, None, {
                tab.slug: StepResultShape('ok', tab_output.table_shape),
            }, None)
        with self.assertRaises(UnneededExecution):
            clean_value(ParamDType.Tab(), tab.slug, context)
Example #5
0
    def test_clean_multichartseries_non_number_is_prompting_error(self):
        context = RenderContext(
            None,
            None,
            TableShape(3, [
                Column("A", ColumnType.TEXT()),
                Column("B", ColumnType.DATETIME())
            ]),
            None,
            None,
        )
        value = [
            {
                "column": "A",
                "color": "#aaaaaa"
            },
            {
                "column": "B",
                "color": "#cccccc"
            },
        ]
        with self.assertRaises(PromptingError) as cm:
            clean_value(ParamDType.Multichartseries(), value, context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text",
                                               frozenset({"number"})),
                PromptingError.WrongColumnType(["B"], "datetime",
                                               frozenset({"number"})),
            ],
        )
    def test_clean_multichartseries_non_number_is_prompting_error(self):
        context = self._render_context(input_table=arrow_table({
            "A": ["a"],
            "B":
            pa.array([datetime.now()], pa.timestamp("ns"))
        }))
        value = [
            {
                "column": "A",
                "color": "#aaaaaa"
            },
            {
                "column": "B",
                "color": "#cccccc"
            },
        ]
        with self.assertRaises(PromptingError) as cm:
            clean_value(ParamDType.Multichartseries(), value, context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text",
                                               frozenset({"number"})),
                PromptingError.WrongColumnType(["B"], "datetime",
                                               frozenset({"number"})),
            ],
        )
Example #7
0
    def test_clean_tab_wf_module_changed_raises_unneededexecution(self):
        """
        If a user changes tabs' output during render, raise UnneededExecution.

        It doesn't really matter _what_ the return value is, since the render()
        result will never be saved if this WfModule's delta has changed.
        UnneededExecution seems like the simplest contract to enforce.
        """
        # tab_output is what 'render' _thinks_ the output should be
        tab_output = ProcessResult(pd.DataFrame({'A': [1, 2]}))

        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wfm = tab.wf_modules.create(
            order=0, last_relevant_delta_id=workflow.last_delta_id)
        wfm.cache_render_result(workflow.last_delta_id, tab_output)
        # Simulate reality: wfm.last_relevant_delta_id will change
        wfm.last_relevant_delta_id += 1
        wfm.save(update_fields=['last_relevant_delta_id'])

        context = RenderContext(
            workflow.id, None, None, {
                tab.slug: StepResultShape('ok', tab_output.table_shape),
            }, None)
        with self.assertRaises(UnneededExecution):
            clean_value(ParamDType.Tab(), tab.slug, context)
Example #8
0
    def test_clean_multicolumn_prompting_error_convert_to_text(self):
        # TODO make this _automatic_ instead of quick-fix?
        # ... but for now: prompt for a Quick Fix.
        context = RenderContext(
            None,
            None,
            TableShape(
                3,
                [
                    Column("A", ColumnType.NUMBER()),
                    Column("B", ColumnType.DATETIME()),
                    Column("C", ColumnType.TEXT()),
                ],
            ),
            None,
            None,
        )
        with self.assertRaises(PromptingError) as cm:
            schema = ParamDType.Multicolumn(column_types=frozenset({"text"}))
            clean_value(schema, "A,B", context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A", "B"], None,
                                               frozenset({"text"}))
            ],
        )
Example #9
0
    def test_clean_multichartseries_non_number_is_prompting_error(self):
        context = RenderContext(
            None, None,
            TableShape(3, [
                Column('A', ColumnType.TEXT()),
                Column('B', ColumnType.DATETIME()),
            ]), None, None)
        value = [
            {
                'column': 'A',
                'color': '#aaaaaa'
            },
            {
                'column': 'B',
                'color': '#cccccc'
            },
        ]
        with self.assertRaises(PromptingError) as cm:
            clean_value(ParamDType.Multichartseries(), value, context)

        self.assertEqual(cm.exception.errors, [
            PromptingError.WrongColumnType(['A'], 'text', frozenset({'number'
                                                                     })),
            PromptingError.WrongColumnType(['B'], 'datetime',
                                           frozenset({'number'})),
        ])
 def test_clean_column_prompting_error_convert_to_number(self):
     context = self._render_context(input_table=arrow_table({"A": ["1"]}))
     with self.assertRaises(PromptingError) as cm:
         clean_value(
             ParamDType.Column(column_types=frozenset({"number"})), "A", context
         )
     self.assertEqual(
         cm.exception.errors,
         [PromptingError.WrongColumnType(["A"], "text", frozenset({"number"}))],
     )
Example #11
0
 def test_clean_column_prompting_error_convert_to_number(self):
     context = RenderContext(
         None, None, TableShape(3, [
             Column('A', ColumnType.TEXT()),
         ]), None, None)
     with self.assertRaises(PromptingError) as cm:
         clean_value(ParamDType.Column(column_types=frozenset({'number'})),
                     'A', context)
     self.assertEqual(cm.exception.errors, [
         PromptingError.WrongColumnType(['A'], 'text', frozenset({'number'
                                                                  })),
     ])
    def test_list_prompting_error_concatenate_same_type(self):
        context = self._render_context(
            input_table=arrow_table({"A": ["1"], "B": ["2"]})
        )
        schema = ParamDType.List(
            inner_dtype=ParamDType.Column(column_types=frozenset({"number"}))
        )
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, ["A", "B"], context)

        self.assertEqual(
            cm.exception.errors,
            [PromptingError.WrongColumnType(["A", "B"], "text", frozenset({"number"}))],
        )
Example #13
0
    def test_list_prompting_error_concatenate_different_type_to_text(self):
        context = self._render_context(
            input_table=arrow_table({"A": [1], "B": [datetime.now()]})
        )
        schema = ParamDType.List(
            inner_dtype=ParamDType.Column(column_types=frozenset({"text"}))
        )
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, ["A", "B"], context)

        self.assertEqual(
            cm.exception.errors,
            [PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"}))],
        )
Example #14
0
    def test_clean_multicolumn_prompting_error_convert_to_text(self):
        # TODO make this _automatic_ instead of quick-fix?
        # ... but for now: prompt for a Quick Fix.
        context = self._render_context(
            input_table=arrow_table({"A": [1], "B": [datetime.now()], "C": ["x"]})
        )
        with self.assertRaises(PromptingError) as cm:
            schema = ParamDType.Multicolumn(column_types=frozenset({"text"}))
            clean_value(schema, ["A", "B"], context)

        self.assertEqual(
            cm.exception.errors,
            [PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"}))],
        )
Example #15
0
    def test_clean_file_happy_path(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wfm = tab.wf_modules.create(module_id_name="uploadfile",
                                    order=0,
                                    slug="step-1")
        id = str(uuid.uuid4())
        key = f"wf-${workflow.id}/wfm-${wfm.id}/${id}"
        minio.put_bytes(minio.UserFilesBucket, key, b"1234")
        UploadedFile.objects.create(
            wf_module=wfm,
            name="x.csv.gz",
            size=4,
            uuid=id,
            bucket=minio.UserFilesBucket,
            key=key,
        )
        context = RenderContext(workflow.id, wfm.id, None, None, None)
        result = clean_value(ParamDType.File(), id, context)
        self.assertIsInstance(result, pathlib.Path)
        self.assertEqual(result.read_bytes(), b"1234")
        self.assertEqual(result.suffixes, [".csv", ".gz"])

        # Assert that once `path` goes out of scope, it's deleted
        str_path = str(result)  # get the filesystem path
        del result  # should finalize, deleting the file on the filesystem
        with self.assertRaises(FileNotFoundError):
            os.open(str_path, 0)
Example #16
0
 def test_clean_column_missing_becomes_empty_string(self):
     context = RenderContext(
         None, None, TableShape(3, [
             Column('A', ColumnType.NUMBER()),
         ]), None, None)
     result = clean_value(ParamDType.Column(), 'B', context)
     self.assertEqual(result, '')
Example #17
0
 def test_clean_float_with_int_value(self):
     # ParamDType.Float can have `int` values (because values come from
     # json.parse(), which only gives Numbers so can give "3" instead of
     # "3.0". We want to pass that as `float` in the `params` dict.
     result = clean_value(ParamDType.Float(), 3, None)
     self.assertEqual(result, 3.0)
     self.assertIsInstance(result, float)
Example #18
0
    def test_list_prompting_error_concatenate_same_type(self):
        context = RenderContext(
            None, None,
            TableShape(3, [
                Column('A', ColumnType.TEXT()),
                Column('B', ColumnType.TEXT()),
            ]), None, None)
        schema = ParamDType.List(inner_dtype=ParamDType.Column(
            column_types=frozenset({'number'})))
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, ['A', 'B'], context)

        self.assertEqual(cm.exception.errors, [
            PromptingError.WrongColumnType(['A', 'B'], 'text',
                                           frozenset({'number'})),
        ])
Example #19
0
 def test_clean_file_wrong_wf_module(self):
     workflow = Workflow.create_and_init()
     tab = workflow.tabs.first()
     step = tab.wf_modules.create(module_id_name="uploadfile",
                                  order=0,
                                  slug="step-1")
     step2 = tab.wf_modules.create(module_id_name="uploadfile",
                                   order=1,
                                   slug="step-2")
     id = str(uuid.uuid4())
     key = f"wf-${workflow.id}/wfm-${step.id}/${id}"
     minio.put_bytes(minio.UserFilesBucket, key, b"1234")
     UploadedFile.objects.create(
         wf_module=step2,
         name="x.csv.gz",
         size=4,
         uuid=id,
         bucket=minio.UserFilesBucket,
         key=key,
     )
     context = self._render_context(wf_module_id=step.id)
     result = clean_value(ParamDType.File(), id, context)
     self.assertIsNone(result)
     # Assert that if a temporary file was created to house the download, it
     # no longer exists.
     self.assertListEqual(list(self.basedir.iterdir()), [])
Example #20
0
 def test_clean_column_valid(self):
     context = RenderContext(
         None, None, TableShape(3, [
             Column('A', ColumnType.NUMBER()),
         ]), None, None)
     result = clean_value(ParamDType.Column(), 'A', context)
     self.assertEqual(result, 'A')
Example #21
0
    def test_clean_file_happy_path(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.wf_modules.create(module_id_name="uploadfile",
                                     order=0,
                                     slug="step-1")
        id = str(uuid.uuid4())
        key = f"wf-${workflow.id}/wfm-${step.id}/${id}"
        minio.put_bytes(minio.UserFilesBucket, key, b"1234")
        UploadedFile.objects.create(
            wf_module=step,
            name="x.csv.gz",
            size=4,
            uuid=id,
            bucket=minio.UserFilesBucket,
            key=key,
        )
        with ExitStack() as inner_stack:
            context = self._render_context(wf_module_id=step.id,
                                           exit_stack=inner_stack)
            result: Path = clean_value(ParamDType.File(), id, context)
            self.assertIsInstance(result, Path)
            self.assertEqual(result.read_bytes(), b"1234")
            self.assertEqual(result.suffixes, [".csv", ".gz"])

        # Assert that once `exit_stack` goes out of scope, file is deleted
        self.assertFalse(result.exists())
 def test_clean_condition_and_or_simplify(self):
     context = self._render_context(input_table=arrow_table({"A": [1]}))
     self.assertEqual(
         clean_value(
             ParamDType.Condition(),
             {
                 "operation": "and",
                 "conditions": [
                     {
                         "operation": "or",
                         "conditions": [
                             {
                                 "operation": "cell_is_blank",
                                 "column": "A",
                                 "value": "",
                                 "isCaseSensitive": False,
                                 "isRegex": False,
                             },
                         ],
                     },
                 ],
             },
             context,
         ),
         {
             "operation": "cell_is_blank",
             "column": "A",
         },
     )
 def test_clean_multicolumn_sort_in_table_order(self):
     context = self._render_context(input_table=arrow_table({
         "B": [1],
         "A": [2]
     }))
     result = clean_value(ParamDType.Multicolumn(), ["A", "B"], context)
     self.assertEqual(result, ["B", "A"])
    def test_clean_column_prompting_error_convert_to_text(self):
        # TODO make this _automatic_ instead of quick-fix?
        # Consider Regex. We probably want to pass the module a text Series
        # _separately_ from the input DataFrame. That way Regex can output
        # a new Text column but preserve its input column's data type.
        #
        # ... but for now: prompt for a Quick Fix.
        context = self._render_context(input_table=arrow_table({"A": [1]}))
        with self.assertRaises(PromptingError) as cm:
            clean_value(ParamDType.Column(column_types=frozenset({"text"})),
                        "A", context)

        self.assertEqual(
            cm.exception.errors,
            [PromptingError.WrongColumnType(["A"], None, frozenset({"text"}))],
        )
 def test_clean_condition_not(self):
     context = self._render_context(input_table=arrow_table({"A": ["a"]}))
     self.assertEqual(
         clean_value(
             ParamDType.Condition(),
             {
                 "operation": "text_is_not",
                 "column": "A",
                 "value": "a",
                 "isCaseSensitive": False,
                 "isRegex": False,
             },
             context,
         ),
         {
             "operation": "not",
             "condition": {
                 "operation": "text_is",
                 "column": "A",
                 "value": "a",
                 "isCaseSensitive": False,
                 "isRegex": False,
             },
         },
     )
Example #26
0
    def test_clean_multicolumn_from_other_tab(self):
        tab_output = ProcessResult(pd.DataFrame({'A-from-tab-2': [1, 2]}))
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wfm = tab.wf_modules.create(
            order=0, last_relevant_delta_id=workflow.last_delta_id)
        wfm.cache_render_result(workflow.last_delta_id, tab_output)

        schema = ParamDType.Dict({
            'tab':
            ParamDType.Tab(),
            'columns':
            ParamDType.Multicolumn(tab_parameter='tab'),
        })
        params = {'tab': tab.slug, 'columns': ['A-from-tab-1', 'A-from-tab-2']}
        context = RenderContext(
            workflow.id, None,
            TableShape(3, [
                Column('A-from-tab-1', ColumnType.NUMBER()),
            ]), {
                tab.slug: StepResultShape('ok', tab_output.table_shape),
            }, params)
        result = clean_value(schema, params, context)
        # result['tab'] is not what we're testing here
        self.assertEqual(result['columns'], ['A-from-tab-2'])
Example #27
0
 def test_clean_file_no_uploaded_file(self):
     workflow = Workflow.create_and_init()
     tab = workflow.tabs.first()
     wfm = tab.wf_modules.create(module_id_name='uploadfile', order=0)
     context = RenderContext(workflow.id, wfm.id, None, None, None)
     result = clean_value(ParamDType.File(), str(uuid.uuid4()), context)
     self.assertIsNone(result)
Example #28
0
    def test_clean_multicolumn_from_other_tab(self):
        tab_output = ProcessResult(pd.DataFrame({"A-from-tab-2": [1, 2]}))
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wfm = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=workflow.last_delta_id)
        wfm.cache_render_result(workflow.last_delta_id, tab_output)

        schema = ParamDType.Dict({
            "tab":
            ParamDType.Tab(),
            "columns":
            ParamDType.Multicolumn(tab_parameter="tab"),
        })
        params = {"tab": tab.slug, "columns": ["A-from-tab-1", "A-from-tab-2"]}
        context = RenderContext(
            workflow.id,
            None,
            TableShape(3, [Column("A-from-tab-1", ColumnType.NUMBER())]),
            {tab.slug: StepResultShape("ok", tab_output.table_shape)},
            params,
        )
        result = clean_value(schema, params, context)
        # result['tab'] is not what we're testing here
        self.assertEqual(result["columns"], ["A-from-tab-2"])
 def test_clean_multichartseries_missing_is_removed(self):
     context = self._render_context(input_table=arrow_table({"A": [1], "B": [1]}))
     value = [
         {"column": "A", "color": "#aaaaaa"},
         {"column": "C", "color": "#cccccc"},
     ]
     result = clean_value(ParamDType.Multichartseries(), value, context)
     self.assertEqual(result, [{"column": "A", "color": "#aaaaaa"}])
 def test_clean_multicolumn_missing_is_removed(self):
     context = self._render_context(input_table=arrow_table({
         "A": [1],
         "B": [1]
     }))
     result = clean_value(ParamDType.Multicolumn(), ["A", "X", "B"],
                          context)
     self.assertEqual(result, ["A", "B"])