def test_param_schema_explicit(self):
        mv = ModuleVersion.create_or_replace_from_spec(
            {
                "id_name": "x",
                "name": "x",
                "category": "Clean",
                "parameters": [{
                    "id_name": "whee",
                    "type": "custom"
                }],
                "param_schema": {
                    "id_name": {
                        "type": "dict",
                        "properties": {
                            "x": {
                                "type": "integer"
                            },
                            "y": {
                                "type": "string",
                                "default": "X"
                            },
                        },
                    }
                },
            },
            source_version_hash="1.0",
        )

        self.assertEqual(
            repr(mv.param_schema),
            repr(
                ParamDType.Dict({
                    "id_name":
                    ParamDType.Dict({
                        "x": ParamDType.Integer(),
                        "y": ParamDType.String(default="X"),
                    })
                })),
        )
Exemple #2
0
 def test_clean_multicolumn_sort_in_table_order(self):
     context = RenderContext(
         None,
         None,
         TableShape(3, [
             Column("B", ColumnType.NUMBER()),
             Column("A", ColumnType.NUMBER())
         ]),
         None,
         None,
     )
     result = clean_value(ParamDType.Multicolumn(), ["A", "B"], context)
     self.assertEqual(result, ["B", "A"])
Exemple #3
0
 def test_multichartseries_omit_missing_table_columns(self):
     dtype = ParamDType.Multichartseries()
     value = dtype.omit_missing_table_columns([
         {
             'column': 'X',
             'color': '#abcdef'
         },
         {
             'column': 'Y',
             'color': '#abc123'
         },
     ], {'X', 'Z'})
     self.assertEqual(value, [{'column': 'X', 'color': '#abcdef'}])
    def test_clean_column_prompting_error_convert_to_number(self):
        input_shape = TableShape(3, [Column("A", ColumnType.TEXT())])
        with self.assertRaises(PromptingError) as cm:
            clean_value(ParamDType.Column(column_types=frozenset({"number"})),
                        "A", input_shape)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text",
                                               frozenset({"number"}))
            ],
        )
Exemple #5
0
    def test_dict_prompting_error(self):
        context = RenderContext(
            None, None,
            TableShape(3, [
                Column('A', ColumnType.TEXT()),
                Column('B', ColumnType.TEXT()),
            ]), None, None)
        schema = ParamDType.Dict({
            'col1':
            ParamDType.Column(column_types=frozenset({'number'})),
            'col2':
            ParamDType.Column(column_types=frozenset({'datetime'})),
        })
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, {'col1': 'A', 'col2': 'B'}, context)

        self.assertEqual(cm.exception.errors, [
            PromptingError.WrongColumnType(['A'], 'text', frozenset({'number'
                                                                     })),
            PromptingError.WrongColumnType(['B'], 'text',
                                           frozenset({'datetime'})),
        ])
    def test_clean_multicolumn_from_other_tab_that_does_not_exist(self):
        # The other tab would not exist if the user selected and then deleted
        # it.
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()

        schema = ParamDType.Dict({
            'tab':
            ParamDType.Tab(),
            'columns':
            ParamDType.Multicolumn(tab_parameter='tab'),
        })
        param_values = {'tab': 'tab-missing', 'columns': ['A-from-tab']}
        params = Params(schema, param_values, {})
        context = RenderContext(
            workflow.id,
            TableShape(3, [
                Column('A-from-tab-1', ColumnType.NUMBER()),
            ]), {}, params)
        result = clean_value(schema, param_values, context)
        # result['tab'] is not what we're testing here
        self.assertEqual(result['columns'], [])
Exemple #7
0
 def test_clean_file_wrong_wf_module(self):
     workflow = Workflow.create_and_init()
     tab = workflow.tabs.first()
     wfm = tab.wf_modules.create(module_id_name='uploadfile', order=0)
     wfm2 = tab.wf_modules.create(module_id_name='uploadfile', order=1)
     id = str(uuid.uuid4())
     key = f'wf-${workflow.id}/wfm-${wfm.id}/${id}'
     minio.put_bytes(minio.UserFilesBucket, key, b'1234')
     UploadedFile.objects.create(wf_module=wfm2, name='x.csv.gz', size=4,
                                 uuid=id, bucket=minio.UserFilesBucket,
                                 key=key)
     context = RenderContext(workflow.id, wfm.id, None, None, None)
     result = clean_value(ParamDType.File(), id, context)
     self.assertIsNone(result)
    def test_dict_prompting_error(self):
        input_shape = TableShape(
            3,
            [Column("A", ColumnType.TEXT()),
             Column("B", ColumnType.TEXT())])
        schema = ParamDType.Dict({
            "col1":
            ParamDType.Column(column_types=frozenset({"number"})),
            "col2":
            ParamDType.Column(column_types=frozenset({"datetime"})),
        })
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, {"col1": "A", "col2": "B"}, input_shape)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text",
                                               frozenset({"number"})),
                PromptingError.WrongColumnType(["B"], "text",
                                               frozenset({"datetime"})),
            ],
        )
Exemple #9
0
    def test_clean_multicolumn_from_other_tab_that_does_not_exist(self):
        # The other tab would not exist if the user selected and then deleted
        # it.
        workflow = Workflow.create_and_init()
        workflow.tabs.first()

        schema = ParamDType.Dict({
            "tab":
            ParamDType.Tab(),
            "columns":
            ParamDType.Multicolumn(tab_parameter="tab"),
        })
        params = {"tab": "tab-missing", "columns": ["A-from-tab"]}
        context = RenderContext(
            workflow.id,
            None,
            TableShape(3, [Column("A-from-tab-1", ColumnType.NUMBER())]),
            {},
            params,
        )
        result = clean_value(schema, params, context)
        # result['tab'] is not what we're testing here
        self.assertEqual(result["columns"], [])
    def test_param_schema_explicit(self):
        mv = ModuleVersion.create_or_replace_from_spec({
            'id_name': 'x', 'name': 'x', 'category': 'Clean',
            'parameters': [
                {'id_name': 'whee', 'type': 'custom'}
            ],
            'param_schema': {
                'id_name': {
                    'type': 'dict',
                    'properties': {
                        'x': {'type': 'integer'},
                        'y': {'type': 'string', 'default': 'X'},
                    },
                },
            },
        }, source_version_hash='1.0')

        self.assertEqual(repr(mv.param_schema), repr(ParamDType.Dict({
            'id_name': ParamDType.Dict({
                'x': ParamDType.Integer(),
                'y': ParamDType.String(default='X'),
            }),
        })))
Exemple #11
0
 def test_clean_multicolumn_missing_is_removed(self):
     context = RenderContext(
         None,
         None,
         TableShape(3, [
             Column("A", ColumnType.NUMBER()),
             Column("B", ColumnType.NUMBER())
         ]),
         None,
         None,
     )
     result = clean_value(ParamDType.Multicolumn(), ["A", "X", "B"],
                          context)
     self.assertEqual(result, ["A", "B"])
Exemple #12
0
    def test_list_prompting_error_concatenate_same_type(self):
        context = RenderContext(
            None,
            None,
            TableShape(3, [
                Column("A", ColumnType.TEXT()),
                Column("B", ColumnType.TEXT())
            ]),
            None,
            None,
        )
        schema = ParamDType.List(inner_dtype=ParamDType.Column(
            column_types=frozenset({"number"})))
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, ["A", "B"], context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A", "B"], "text",
                                               frozenset({"number"}))
            ],
        )
Exemple #13
0
    def test_clean_column_prompting_error_convert_to_text(self):
        # TODO make this _automatic_ instead of quick-fix?
        # Consider Regex. We probably want to pass the module a text Series
        # _separately_ from the input DataFrame. That way Regex can output
        # a new Text column but preserve its input column's data type.
        #
        # ... but for now: prompt for a Quick Fix.
        input_shape = TableShape(3, [Column('A', ColumnType.NUMBER())])
        with self.assertRaises(PromptingError) as cm:
            clean_value(ParamDType.Column(column_types=frozenset({'text'})),
                        'A', input_shape)

        self.assertEqual(cm.exception.errors, [
            PromptingError.WrongColumnType(['A'], 'number', frozenset({'text'
                                                                       })),
        ])
Exemple #14
0
    def test_clean_tabs_preserve_ordering(self):
        tab2_output = ProcessResult(pd.DataFrame({"A": [1, 2]}))
        tab3_output = ProcessResult(pd.DataFrame({"B": [2, 3]}))
        workflow = Workflow.create_and_init()
        tab1 = workflow.tabs.first()
        tab2 = workflow.tabs.create(position=1, slug="tab-2", name="Tab 2")
        tab3 = workflow.tabs.create(position=1, slug="tab-3", name="Tab 3")
        wfm2 = tab2.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=workflow.last_delta_id)
        wfm2.cache_render_result(workflow.last_delta_id, tab2_output)
        wfm3 = tab3.wf_modules.create(
            order=0,
            slug="step-2",
            last_relevant_delta_id=workflow.last_delta_id)
        wfm3.cache_render_result(workflow.last_delta_id, tab3_output)

        # RenderContext's dict ordering determines desired tab order. (Python
        # 3.7 spec: dict is ordered in insertion order. CPython 3.6 and PyPy 7
        # do this, too.)
        context = RenderContext(
            workflow.id,
            None,
            None,
            {
                tab1.slug: None,
                tab2.slug: StepResultShape("ok", tab2_output.table_shape),
                tab3.slug: StepResultShape("ok", tab3_output.table_shape),
            },
            None,
        )
        # Supply wrongly-ordered tabs. Cleaned, they should be in order.
        result = clean_value(ParamDType.Multitab(), [tab3.slug, tab2.slug],
                             context)
        self.assertEqual(result[0].slug, tab2.slug)
        self.assertEqual(result[0].name, tab2.name)
        self.assertEqual(result[0].columns,
                         {"A": RenderColumn("A", "number", "{:,}")})
        assert_frame_equal(result[0].dataframe, pd.DataFrame({"A": [1, 2]}))
        self.assertEqual(result[1].slug, tab3.slug)
        self.assertEqual(result[1].name, tab3.name)
        self.assertEqual(result[1].columns,
                         {"B": RenderColumn("B", "number", "{:,}")})
        assert_frame_equal(result[1].dataframe, pd.DataFrame({"B": [2, 3]}))
Exemple #15
0
    def test_clean_multichartseries_non_number_is_prompting_error(self):
        context = RenderContext(None, None, TableShape(3, [
            Column('A', ColumnType.TEXT()),
            Column('B', ColumnType.DATETIME()),
        ]), None, None)
        value = [
            {'column': 'A', 'color': '#aaaaaa'},
            {'column': 'B', 'color': '#cccccc'},
        ]
        with self.assertRaises(PromptingError) as cm:
            clean_value(ParamDType.Multichartseries(), value, context)

        self.assertEqual(cm.exception.errors, [
            PromptingError.WrongColumnType(['A'], 'text',
                                           frozenset({'number'})),
            PromptingError.WrongColumnType(['B'], 'datetime',
                                           frozenset({'number'})),
        ])
Exemple #16
0
    def test_clean_multicolumn_prompting_error_convert_to_text(self):
        # TODO make this _automatic_ instead of quick-fix?
        # ... but for now: prompt for a Quick Fix.
        input_shape = TableShape(3, [
            Column('A', ColumnType.NUMBER()),
            Column('B', ColumnType.DATETIME()),
            Column('C', ColumnType.TEXT()),
        ])
        with self.assertRaises(PromptingError) as cm:
            schema = ParamDType.Multicolumn(column_types=frozenset({'text'}))
            clean_value(schema, 'A,B', input_shape)

        self.assertEqual(cm.exception.errors, [
            PromptingError.WrongColumnType(['A'], 'number', frozenset({'text'
                                                                       })),
            PromptingError.WrongColumnType(['B'], 'datetime',
                                           frozenset({'text'})),
        ])
Exemple #17
0
    def test_clean_multicolumn_prompting_error_convert_to_text(self):
        # TODO make this _automatic_ instead of quick-fix?
        # ... but for now: prompt for a Quick Fix.
        input_shape = TableShape(
            3,
            [
                Column("A", ColumnType.NUMBER()),
                Column("B", ColumnType.DATETIME()),
                Column("C", ColumnType.TEXT()),
            ],
        )
        with self.assertRaises(PromptingError) as cm:
            schema = ParamDType.Multicolumn(column_types=frozenset({"text"}))
            clean_value(schema, "A,B", input_shape)

        self.assertEqual(
            cm.exception.errors,
            [PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"}))],
        )
Exemple #18
0
 def test_clean_multichartseries_missing_is_removed(self):
     context = RenderContext(
         None, None,
         TableShape(3, [
             Column('A', ColumnType.NUMBER()),
             Column('B', ColumnType.NUMBER()),
         ]), None, None)
     value = [
         {
             'column': 'A',
             'color': '#aaaaaa'
         },
         {
             'column': 'C',
             'color': '#cccccc'
         },
     ]
     result = clean_value(ParamDType.Multichartseries(), value, context)
     self.assertEqual(result, [{'column': 'A', 'color': '#aaaaaa'}])
Exemple #19
0
    def test_clean_tabs_happy_path(self):
        tab1_output = ProcessResult(pd.DataFrame({'A': [1, 2]}))
        workflow = Workflow.create_and_init()
        tab1 = workflow.tabs.first()
        wfm = tab1.wf_modules.create(
            order=0, last_relevant_delta_id=workflow.last_delta_id)
        wfm.cache_render_result(workflow.last_delta_id, tab1_output)

        context = RenderContext(
            workflow.id, None, None, {
                tab1.slug: StepResultShape('ok', tab1_output.table_shape),
            }, None)
        result = clean_value(ParamDType.Multitab(), [tab1.slug], context)
        self.assertEqual(result[0].slug, tab1.slug)
        self.assertEqual(result[0].name, tab1.name)
        self.assertEqual(result[0].columns, {
            'A': RenderColumn('A', 'number', '{:,}'),
        })
        assert_frame_equal(result[0].dataframe, pd.DataFrame({'A': [1, 2]}))
Exemple #20
0
    def test_clean_file_happy_path(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wfm = tab.wf_modules.create(module_id_name='uploadfile', order=0)
        id = str(uuid.uuid4())
        key = f'wf-${workflow.id}/wfm-${wfm.id}/${id}'
        minio.put_bytes(minio.UserFilesBucket, key, b'1234')
        UploadedFile.objects.create(wf_module=wfm, name='x.csv.gz', size=4,
                                    uuid=id, bucket=minio.UserFilesBucket,
                                    key=key)
        context = RenderContext(workflow.id, wfm.id, None, None, None)
        result = clean_value(ParamDType.File(), id, context)
        self.assertIsInstance(result, pathlib.Path)
        self.assertEqual(result.read_bytes(), b'1234')
        self.assertEqual(result.suffixes, ['.csv', '.gz'])

        # Assert that once `path` goes out of scope, it's deleted
        str_path = str(result)  # get the filesystem path
        del result  # should finalize, deleting the file on the filesystem
        with self.assertRaises(FileNotFoundError):
            os.open(str_path, 0)
Exemple #21
0
 def test_clean_multichartseries_missing_is_removed(self):
     context = RenderContext(
         None,
         None,
         TableShape(3, [
             Column("A", ColumnType.NUMBER()),
             Column("B", ColumnType.NUMBER())
         ]),
         None,
         None,
     )
     value = [
         {
             "column": "A",
             "color": "#aaaaaa"
         },
         {
             "column": "C",
             "color": "#cccccc"
         },
     ]
     result = clean_value(ParamDType.Multichartseries(), value, context)
     self.assertEqual(result, [{"column": "A", "color": "#aaaaaa"}])
Exemple #22
0
    def test_clean_tab_happy_path(self):
        tab_output = ProcessResult(pd.DataFrame({"A": [1, 2]}))
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wfm = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=workflow.last_delta_id)
        wfm.cache_render_result(workflow.last_delta_id, tab_output)

        context = RenderContext(
            workflow.id,
            None,
            None,
            {tab.slug: StepResultShape("ok", tab_output.table_shape)},
            None,
        )
        result = clean_value(ParamDType.Tab(), tab.slug, context)
        self.assertEqual(result.slug, tab.slug)
        self.assertEqual(result.name, tab.name)
        self.assertEqual(result.columns,
                         {"A": RenderColumn("A", "number", "{:,}")})
        assert_frame_equal(result.dataframe, pd.DataFrame({"A": [1, 2]}))
Exemple #23
0
 def test_clean_file_no_minio_file(self):
     workflow = Workflow.create_and_init()
     tab = workflow.tabs.first()
     wfm = tab.wf_modules.create(module_id_name="uploadfile",
                                 order=0,
                                 slug="step-1")
     wfm2 = tab.wf_modules.create(module_id_name="uploadfile",
                                  order=1,
                                  slug="step-2")
     id = str(uuid.uuid4())
     key = f"wf-${workflow.id}/wfm-${wfm.id}/${id}"
     # Oops -- let's _not_ put the file!
     # minio.put_bytes(minio.UserFilesBucket, key, b'1234')
     UploadedFile.objects.create(
         wf_module=wfm2,
         name="x.csv.gz",
         size=4,
         uuid=id,
         bucket=minio.UserFilesBucket,
         key=key,
     )
     context = RenderContext(workflow.id, wfm.id, None, None, None)
     result = clean_value(ParamDType.File(), id, context)
     self.assertIsNone(result)
Exemple #24
0
 def test_clean_tabs_tab_error_raises_cycle(self):
     context = RenderContext(None, None, None, {'tab-1': None}, None)
     with self.assertRaises(TabCycleError):
         clean_value(ParamDType.Multitab(), ['tab-1'], context)
Exemple #25
0
 def test_clean_tabs_nix_missing_tab(self):
     context = RenderContext(None, None, None, {}, None)
     result = clean_value(ParamDType.Multitab(), ['tab-missing'], context)
     self.assertEqual(result, [])
Exemple #26
0
 def test_clean_tab_tab_error_raises_cycle(self):
     shape = StepResultShape('error', TableShape(0, []))
     context = RenderContext(None, None, None, {'tab-1': shape}, None)
     with self.assertRaises(TabOutputUnreachableError):
         clean_value(ParamDType.Tab(), 'tab-1', context)
Exemple #27
0
 def test_clean_tab_no_tab_output_raises_cycle(self):
     context = RenderContext(None, None, None, {'tab-1': None}, None)
     with self.assertRaises(TabCycleError):
         clean_value(ParamDType.Tab(), 'tab-1', context)
Exemple #28
0
 def test_clean_tab_no_tab_selected_gives_none(self):
     context = RenderContext(None, None, None, {}, None)
     result = clean_value(ParamDType.Tab(), '', context)
     self.assertEqual(result, None)
Exemple #29
0
 def test_clean_file_none(self):
     result = clean_value(ParamDType.File(), None, None)
     self.assertEqual(result, None)
Exemple #30
0
 def test_clean_float(self):
     result = clean_value(ParamDType.Float(), 3.0, None)
     self.assertEqual(result, 3.0)
     self.assertIsInstance(result, float)