Exemple #1
0
def test_parse_renames_rename_too_long_columns():
    assert _parse_renames(
        {
            "A": "BBBBBBBBBB",
            "BBBBBBBBBB": "BBBBBBBBBB"
        },
        ["A", "BBBBBBBBBB"],
        settings=Settings(MAX_BYTES_PER_COLUMN_NAME=10),
    ) == (
        {
            "A": "BBBBBBBB 2"
        },
        [
            RenderError(
                cjwmodule_i18n_message(
                    "util.colnames.warnings.truncated",
                    {
                        "n_columns": 1,
                        "first_colname": "BBBBBBBB 2",
                        "n_bytes": 10
                    },
                )),
            RenderError(
                cjwmodule_i18n_message(
                    "util.colnames.warnings.numbered",
                    {
                        "n_columns": 1,
                        "first_colname": "BBBBBBBB 2"
                    },
                )),
        ],
    )
Exemple #2
0
    def test_duplicate_copies_fresh_cache(self):
        # The cache's filename depends on workflow_id and step_id.
        # Duplicating it would need more complex code :).
        table = make_table(make_column("A", [1], format="${:,.2f}"))
        write_to_rendercache(
            self.workflow,
            self.step,
            1,
            table=table,
            errors=[RenderError(I18nMessage("X", {}, None))],
            json={"foo": "bar"},
        )

        workflow2 = Workflow.objects.create()
        tab2 = workflow2.tabs.create(position=0)
        dup = self.step.duplicate_into_new_workflow(tab2)

        dup_cached_result = dup.cached_render_result
        self.assertEqual(
            dup_cached_result,
            replace(
                self.step.cached_render_result,
                workflow_id=workflow2.id,
                step_id=dup.id,
                delta_id=0,
            ),
        )
        with open_cached_render_result(dup_cached_result) as result2:
            assert_arrow_table_equals(result2.table, table)
            self.assertEqual(result2.errors,
                             [RenderError(I18nMessage("X", {}, None))])
            self.assertEqual(result2.json, {"foo": "bar"})
def test_assert_result_equals_ok():
    table1 = make_table(make_column("A", [1]))
    table2 = make_table(make_column("A", [1]))
    assert_result_equals(
        ArrowRenderResult(
            table1,
            errors=[RenderError(I18nMessage("foo", {}, "module"))],
            json={"foo": "bar"},
        ),
        ArrowRenderResult(
            table2,
            errors=[RenderError(I18nMessage("foo", {}, "module"))],
            json={"foo": "bar"},
        ),
    )
Exemple #4
0
def test_group_date_prompt_all_is_well_when_date_column_present():
    assert_result_equals(
        render(
            make_table(
                make_column("A", [datetime.date(2021, 5, 10)], unit="week"),
                make_column("B", [1]),
            ),
            P(
                groups=dict(
                    colnames=["A", "B"], group_dates=True, date_granularities={}
                ),
                aggregations=[dict(operation="size", colname="", outname="size")],
            ),
        ),
        ArrowRenderResult(
            make_table(
                make_column("A", [datetime.date(2021, 5, 10)], unit="week"),
                make_column("B", [1]),
                make_column("size", [1], format="{:,d}"),
            ),
            [
                RenderError(
                    i18n_message(
                        "group_dates.date_selected",
                        dict(columns=1, column0="A", unit0="week"),
                    )
                )
            ],
        ),
    )
Exemple #5
0
def test_group_date_prompt_upgrade_timestamp_to_date():
    assert_result_equals(
        render(
            make_table(make_column("A", [datetime.datetime(2021, 5, 5)])),
            P(
                groups=dict(
                    colnames=["A"], group_dates=True, date_granularities={"A": "Y"}
                ),
                aggregations=[dict(operation="size", colname="", outname="size")],
            ),
        ),
        ArrowRenderResult(
            make_table(
                make_column("A", [datetime.datetime(2021, 1, 1)]),
                make_column("size", [1], format="{:,d}"),
            ),
            [
                RenderError(
                    i18n_message("group_dates.granularity_deprecated.need_dates"),
                    [
                        QuickFix(
                            i18n_message(
                                "group_dates.granularity_deprecated.quick_fix.convert_to_date"
                            ),
                            QuickFixAction.PrependStep(
                                "converttimestamptodate",
                                dict(colnames=["A"], unit="year"),
                            ),
                        )
                    ],
                )
            ],
        ),
    )
Exemple #6
0
def test_ignore_non_date_timestamps():
    # Steps for the user to get here:
    # 1. Make a date column, 'A'
    # 2. Check "Group Dates". The column appears.
    # 3. Select column 'A', and select a date granularity for it
    # 4. Alter the input DataFrame such that 'A' is no longer datetime
    #
    # Expected results: you can't group it by date any more.
    assert_result_equals(
        render(
            make_table(
                make_column("A", [1]),  # "used to be a datetime"
                make_column(
                    "B", [datetime.datetime(2019, 1, 4)]
                ),  # so we don't need quickfix
            ),
            P(
                groups=dict(
                    colnames=["A"], group_dates=True, date_granularities={"A": "T"}
                ),
                aggregations=[dict(operation="size", colname="", outname="size")],
            ),
        ),
        ArrowRenderResult(
            make_table(make_column("A", [1]), make_column("size", [1], format="{:,d}")),
            [RenderError(i18n_message("group_dates.select_date_columns"))],
        ),
    )
def _parse_renames(
        renames: Dict[str, str], table_columns: List[str], *,
        settings: Settings) -> Tuple[Dict[str, str], List[RenderError]]:
    """Convert `renames` into a valid mapping for `table_columns`, plus warnings.

    Ignore any renames to "". That column name is not allowed.

    Return a minimal and valid dict from old colname to new colname.

    `renames` is a dict mapping old colname to new colname. It may contain
    missing origin column names and it may duplicate destination column names.
    The logic to handle this: do _all_ the user's renames at once, and then
    queue extra renames for columns that end up with duplicate names. Those
    extra renames are handled left-to-right (the order of `table_columns`
    matters).
    """
    # "renames.get(c) or c" means:
    # * If renames[c] exists and is "", return c
    # * If renames[c] does not exist, return c
    # * If renames[c] exists and is _not_ "", return renames[c]
    nix_colnames = [c for c in table_columns if (renames.get(c) or c) != c]
    nix_colnames_set = frozenset(nix_colnames)
    existing_colnames = [c for c in table_columns if c not in nix_colnames_set]
    try_new_colnames = [
        renames[c] for c in table_columns if c in nix_colnames_set
    ]

    new_colnames, errors = gen_unique_clean_colnames_and_warn(
        try_new_colnames, existing_names=existing_colnames, settings=settings)
    return {k: v
            for k, v in zip(nix_colnames, new_colnames)
            }, [RenderError(message) for message in errors]
def test_assert_result_equals_check_errors():
    with pytest.raises(AssertionError, match=r"-\[Render.*\n\+\[\]"):
        assert_result_equals(
            ArrowRenderResult(make_table()),
            ArrowRenderResult(
                make_table(),
                errors=[RenderError(I18nMessage("foo", {}, "module"))]),
        )
Exemple #9
0
def call_render(module_spec: ModuleSpec, render: Callable,
                request: ttypes.RenderRequest) -> ttypes.RenderResult:
    basedir = Path(request.basedir)
    input_path = basedir / request.input_filename
    table = load_trusted_arrow_file(input_path)
    dataframe = cjwpandasmodule.convert.arrow_table_to_pandas_dataframe(table)
    tab_outputs = {
        k: _thrift_tab_output_to_pandas(v, basedir)
        for k, v in request.tab_outputs.items()
    }
    params = _prepare_params(module_spec,
                             thrift_json_object_to_pydict(request.params),
                             basedir, tab_outputs)
    spec = inspect.getfullargspec(render)
    kwargs = {}
    varkw = bool(spec.varkw)  # if True, function accepts **kwargs
    kwonlyargs = spec.kwonlyargs
    if varkw or "fetch_result" in kwonlyargs:
        if request.fetch_result is None:
            fetch_result = None
        else:
            fetch_result_path = basedir / request.fetch_result.filename
            errors = [
                # Data comes in as FetchError and we return RenderError.
                RenderError(thrift_i18n_message_to_arrow(e.message))
                for e in request.fetch_result.errors
            ]
            if (fetch_result_path.stat().st_size == 0
                    or cjwparquet.file_has_parquet_magic_number(
                        fetch_result_path)):
                fetch_result = ptypes.ProcessResult(
                    dataframe=_parquet_to_pandas(fetch_result_path),
                    errors=errors,
                    # infer columns -- the fetch interface doesn't handle formats
                    # (TODO nix pandas_v0 fetching altogether by rewriting all modules)
                )
            else:
                # TODO nix pandas Fetch modules. (Do any use files, even?)
                fetch_result = types.FetchResult(path=fetch_result_path,
                                                 errors=errors)
        kwargs["fetch_result"] = fetch_result
    if varkw or "settings" in kwonlyargs:
        kwargs["settings"] = settings
    if varkw or "tab_name" in kwonlyargs:
        kwargs["tab_name"] = request.tab_name
    if varkw or "input_columns" in kwonlyargs:
        kwargs["input_columns"] = arrow_schema_to_render_columns(table.schema)

    input_columns = read_columns(table, full=False)
    raw_result = render(dataframe, params, **kwargs)

    # raise ValueError if invalid
    pandas_result = ptypes.ProcessResult.coerce(
        raw_result, try_fallback_columns=input_columns)
    pandas_result.truncate_in_place_if_too_big()

    arrow_result = pandas_result.to_arrow(basedir / request.output_filename)
    return arrow_render_result_to_thrift(arrow_result)
Exemple #10
0
    def test_delete_step(self):
        write_to_rendercache(
            self.workflow,
            self.step,
            1,
            table=make_table(make_column("A", [1])),
            errors=[RenderError(I18nMessage("X", {}, None), [])],
            json={"foo": "bar"},
        )

        parquet_key = crr_parquet_key(self.step.cached_render_result)
        self.step.delete()
        self.assertFalse(s3.exists(BUCKET, parquet_key))
Exemple #11
0
def test_group_date_prompt_when_nothing_selected():
    assert_result_equals(
        render(
            make_table(make_column("A", [1])),
            P(
                groups=dict(colnames=[], group_dates=True, date_granularities={}),
                aggregations=[dict(operation="sum", colname="A", outname="sum")],
            ),
        ),
        ArrowRenderResult(
            make_table(make_column("sum", [1])),
            [RenderError(i18n_message("group_dates.select_date_columns"))],
        ),
    )
Exemple #12
0
def test_group_dates_prompt_select_date_column():
    assert_result_equals(
        render(
            make_table(make_column("A", [1])),
            P(
                groups=dict(colnames=["A"], group_dates=True, date_granularities={}),
                aggregations=[dict(operation="size", colname="", outname="size")],
            ),
        ),
        ArrowRenderResult(
            make_table(make_column("A", [1]), make_column("size", [1], format="{:,d}")),
            errors=[RenderError(i18n_message("group_dates.select_date_columns"))],
        ),
    )
Exemple #13
0
def test_group_date_prompt_convert_text_to_date():
    assert_result_equals(
        render(
            make_table(
                make_column("A", ["2021-05-05"]),
                make_column("B", ["2021-05-05"]),
            ),
            P(
                groups=dict(
                    colnames=["A", "B"], group_dates=True, date_granularities={}
                ),
                aggregations=[dict(operation="size", colname="", outname="size")],
            ),
        ),
        ArrowRenderResult(
            make_table(
                make_column("A", ["2021-05-05"]),
                make_column("B", ["2021-05-05"]),
                make_column("size", [1], format="{:,d}"),
            ),
            [
                RenderError(
                    i18n_message(
                        "group_dates.text_selected", dict(columns=2, column0="A")
                    ),
                    [
                        QuickFix(
                            i18n_message("group_dates.quick_fix.convert_text_to_date"),
                            QuickFixAction.PrependStep(
                                "converttexttodate", dict(colnames=["A", "B"])
                            ),
                        ),
                        QuickFix(
                            i18n_message(
                                "group_dates.quick_fix.convert_text_to_timestamp"
                            ),
                            QuickFixAction.PrependStep(
                                "convert-date", dict(colnames=["A", "B"])
                            ),
                        ),
                    ],
                )
            ],
        ),
    )
def _parse_custom_list(
        custom_list: str, table_columns: List[str], *,
        settings: Settings) -> Tuple[Dict[str, str], List[i18n.I18nMessage]]:
    """Convert `custom_list` into a valid mapping for `table_columns`.

    Return a minimal and valid dict from old colname to new colname.

    Raise `ValueError` if the user entered too many column names.

    `custom_list` is a textarea filled in by a user, separated by
    commas/newlines. (We prefer newlines, but if the user writes a
    comma-separated list we use commas.) The logic to handle this: do _all_
    the user's renames at once, and then queue extra renames for columns
    that end up with duplicate names. Those extra renames are handled
    left-to-right (the order of `table_columns` matters).
    """
    # Chomp trailing newline, in case the user enters "A,B,C\n".
    custom_list = custom_list.rstrip()

    # Split by newline (preferred) or comma (if the user wants that)
    if "\n" in custom_list:
        split_char = "\n"
    else:
        split_char = ","
    rename_list = [s.strip() for s in custom_list.split(split_char)]

    # Convert to dict
    try:
        renames = {table_columns[i]: s for i, s in enumerate(rename_list) if s}
    except IndexError:
        raise RenderErrorException(
            RenderError(
                i18n.trans(
                    "badParam.custom_list.wrongNumberOfNames",
                    "You supplied {n_names, plural, other {# column names} one {# column name}}, "
                    "but the table has {n_columns, plural, other {# columns} one {# column}}.",
                    {
                        "n_names": len(rename_list),
                        "n_columns": len(table_columns)
                    },
                )))

    # Use _parse_renames() logic to consider missing columns and uniquify
    return _parse_renames(renames, table_columns, settings=settings)
Exemple #15
0
def render_arrow_v1(arrow_table, params, *, uploaded_files, **kwargs):
    if params["file"] is None:
        return ArrowRenderResult(pa.table({}))

    path = uploaded_files[params["file"]].path

    try:
        arrow_table, errors = _build_arrow_table(path, params["query_slug"])
    except (InvalidLz4File, sqlite3.DatabaseError):
        return ArrowRenderResult(
            pa.table({}),
            [
                RenderError(
                    i18n.trans("error.invalidFile",
                               "Please upload a valid .sqlite3.lz4 file."))
            ],
        )

    return ArrowRenderResult(arrow_table, errors=errors)
    def test_render_result(self):
        error = RenderError(
            message=I18nMessage("x", {"y": 1}, "module"),
            quick_fixes=[
                QuickFix(
                    button_text=I18nMessage("z", {}, "module"),
                    action=QuickFixAction.PrependStep("converttotext",
                                                      {"a": "b"}),
                )
            ],
        )  # we're testing it is serialized+deserialized correctly

        def render_arrow_v1(table, params, **kwargs):
            return ArrowRenderResult(make_table(make_column("A", ["x"])),
                                     [error])

        with ModuleTestEnv(render_arrow_v1=render_arrow_v1) as env:
            outcome = env.call_render(make_table(), {})
            self.assertEqual(outcome.result, RenderResult([error]))
Exemple #17
0
def test_parse_renames_avoid_duplicates():
    assert _parse_renames({
        "A": "B",
        "C": "B"
    }, ["A", "B", "C"],
                          settings=Settings()) == (
                              {
                                  "A": "B 2",
                                  "C": "B 3"
                              },
                              [
                                  RenderError(
                                      cjwmodule_i18n_message(
                                          id="util.colnames.warnings.numbered",
                                          arguments={
                                              "n_columns": 2,
                                              "first_colname": "B 2"
                                          },
                                      ), )
                              ],
                          )
Exemple #18
0
def test_parse_renames_avoid_duplicates_without_original():
    assert _parse_renames({
        "A": "C",
        "B": "C"
    }, ["A", "B"],
                          settings=Settings()) == (
                              {
                                  "A": "C",
                                  "B": "C 2"
                              },
                              [
                                  RenderError(
                                      cjwmodule_i18n_message(
                                          id="util.colnames.warnings.numbered",
                                          arguments={
                                              "n_columns": 1,
                                              "first_colname": "C 2"
                                          },
                                      ))
                              ],
                          )
Exemple #19
0
def test_render_rename_custom_list_too_many_columns_is_error():
    result = render(
        make_table(make_column("A", ["x"])),
        P(custom_list=True, list_string="X,Y"),
        settings=Settings(),
    )
    assert_result_equals(
        result,
        ArrowRenderResult(
            make_table(),
            [
                RenderError(
                    i18n_message(
                        "badParam.custom_list.wrongNumberOfNames",
                        {
                            "n_names": 2,
                            "n_columns": 1
                        },
                    ))
            ],
        ),
    )
Exemple #20
0
def test_group_date_prompt_upgrade_timestampmath():
    assert_result_equals(
        render(
            make_table(make_column("A", [datetime.datetime(2021, 5, 5, 1, 2, 3, 4)])),
            P(
                groups=dict(
                    colnames=["A"], group_dates=True, date_granularities={"A": "S"}
                ),
                aggregations=[dict(operation="size", colname="", outname="size")],
            ),
        ),
        ArrowRenderResult(
            make_table(
                make_column("A", [datetime.datetime(2021, 5, 5, 1, 2, 3)]),
                make_column("size", [1], format="{:,d}"),
            ),
            [
                RenderError(
                    i18n_message("group_dates.granularity_deprecated.need_rounding"),
                    [
                        QuickFix(
                            i18n_message(
                                "group_dates.granularity_deprecated.quick_fix.round_timestamps"
                            ),
                            QuickFixAction.PrependStep(
                                "timestampmath",
                                dict(
                                    colnames=["A"],
                                    operation="startof",
                                    roundunit="second",
                                ),
                            ),
                        )
                    ],
                )
            ],
        ),
    )
def test_startof_out_of_bounds():
    assert_result_equals(
        render(
            make_table(
                make_column(
                    "A",
                    [dt(1970, 1, 1), dt(1677, 9, 21, 0, 12, 43, 145500)],
                )
            ),
            P(operation="startof", colnames=["A"], roundunit="minute"),
        ),
        ArrowRenderResult(
            make_table(make_column("A", [dt(1970, 1, 1), None])),
            [
                RenderError(
                    i18n_message(
                        "warning.convertedOutOfBoundsToNull",
                        {"timestamp": "1677-09-21T00:12Z"},
                    )
                )
            ],
        ),
    )
def _render_startof(table: pa.Table, colnames: List[str],
                    unit: str) -> ArrowRenderResult:
    truncated = False
    for colname in colnames:
        i = table.column_names.index(colname)
        column_result = _startof(table.columns[i], unit)
        table = table.set_column(i, colname, column_result.column)
        if column_result.truncated:
            truncated = True

    if truncated:
        errors = [
            RenderError(
                trans(
                    "warning.convertedOutOfBoundsToNull",
                    "Converted timestamp {timestamp} to null because it is out of bounds.",
                    {"timestamp": _out_of_bounds_timestamp(unit)},
                ))
        ]
    else:
        errors = []

    return ArrowRenderResult(table, errors=errors)
Exemple #23
0
def test_quickfix_convert_value_strings_to_numbers():
    assert_result_equals(
        render(
            make_table(
                make_column("A", [1, 1, 1]),
                make_column("B", ["a", "b", "a"]),
                make_column("C", ["a", "b", "a"]),
            ),
            P(
                groups=dict(colnames=["A"], group_dates=False, date_granularities={}),
                aggregations=[
                    dict(operation="mean", colname="B", outname="mean"),
                    dict(operation="sum", colname="C", outname="sum"),
                ],
            ),
        ),
        ArrowRenderResult(
            make_table(),
            [
                RenderError(
                    i18n_message(
                        "non_numeric_colnames.error",
                        {"n_columns": 2, "first_colname": "B"},
                    ),
                    quick_fixes=[
                        QuickFix(
                            i18n_message("non_numeric_colnames.quick_fix.text"),
                            QuickFixAction.PrependStep(
                                "converttexttonumber", {"colnames": ["B", "C"]}
                            ),
                        )
                    ],
                )
            ],
        ),
    )
Exemple #24
0
def thrift_render_error_to_arrow(value: ttypes.RenderError) -> RenderError:
    return RenderError(
        thrift_i18n_message_to_arrow(value.message),
        [thrift_quick_fix_to_arrow(qf) for qf in value.quick_fixes],
    )
Exemple #25
0
def _warn_if_using_deprecated_date_granularity(
        table: pa.Table, groups: List[Group]) -> List[RenderError]:
    errors = []

    deprecated_need_upgrade_to_date: List[Group] = []
    deprecated_need_timestampmath: List[Group] = []
    for group in groups:
        if group.date_granularity is not None and pa.types.is_timestamp(
                table.schema.field(group.colname).type):
            if group.date_granularity in {
                    DateGranularity.DAY,
                    DateGranularity.WEEK,
                    DateGranularity.MONTH,
                    DateGranularity.QUARTER,
                    DateGranularity.YEAR,
            }:
                deprecated_need_upgrade_to_date.append(group)
            elif not _timestamp_is_rounded(table[group.colname],
                                           group.date_granularity):
                deprecated_need_timestampmath.append(group)

    if deprecated_need_upgrade_to_date:
        errors.append(
            RenderError(
                i18n.trans(
                    "group_dates.granularity_deprecated.need_dates",
                    "The “Group Dates” feature has changed. Please click to upgrade from Timestamps to Dates. Workbench will force-upgrade in January 2022.",
                ),
                quick_fixes=[
                    QuickFix(
                        i18n.trans(
                            "group_dates.granularity_deprecated.quick_fix.convert_to_date",
                            "Upgrade",
                        ),
                        QuickFixAction.PrependStep(
                            "converttimestamptodate",
                            dict(
                                colnames=[group.colname],
                                unit=group.date_granularity.date_unit,
                            ),
                        ),
                    ) for group in deprecated_need_upgrade_to_date
                ],
            ))

    if deprecated_need_timestampmath:
        errors.append(
            RenderError(
                i18n.trans(
                    "group_dates.granularity_deprecated.need_rounding",
                    "The “Group Dates” feature has changed. Please click to upgrade to Timestamp Math. Workbench will force-upgrade in January 2022.",
                ),
                quick_fixes=[
                    QuickFix(
                        i18n.trans(
                            "group_dates.granularity_deprecated.quick_fix.round_timestamps",
                            "Upgrade",
                        ),
                        QuickFixAction.PrependStep(
                            "timestampmath",
                            dict(
                                colnames=[group.colname],
                                operation="startof",
                                roundunit=group.date_granularity.rounding_unit,
                            ),
                        ),
                    ) for group in deprecated_need_timestampmath
                ],
            ))

    return errors
Exemple #26
0
def render_arrow_v1(table: pa.Table, params: Dict[str, Any],
                    **kwargs) -> ArrowRenderResult:
    colnames = table.column_names
    date_colnames = frozenset(colname for colname in colnames
                              if pa.types.is_timestamp(table[colname].type))
    groups = parse_groups(date_colnames=date_colnames, **params["groups"])
    aggregations = parse_aggregations(params["aggregations"])

    # HACK: set the same default aggregations as we do in our JavaScript component.
    if not aggregations:
        aggregations.append(
            Aggregation(Operation.SIZE, "",
                        Operation.SIZE.default_outname("")))

    # This is a "Group By" module so we need to support the obvious operation,
    # 'SELECT COUNT(*) FROM input'. The obvious way to display that is to select
    # "Count" and not select a Group By column.
    #
    # ... and unfortunately, that form setup -- no columns selected, one
    # "Count" aggregation selected -- is exactly what the user sees by default
    # after adding the module, before step 1 of the onboarding path.
    #
    # So we get a tough choice: either make "no aggregations" a no-op to give
    # us the ideal onboarding path, _OR_ make "no aggregations" default to
    # "count", to support the obvious operation. Pick one: complete+simple, or
    # onboarding-friendly.
    #
    # For now, we're onboarding-friendly and we don't allow SELECT COUNT(*).
    # When we solve https://www.pivotaltracker.com/story/show/163264164 we
    # should change to be complete+simple (because the onboarding will have
    # another answer). That's
    # https://www.pivotaltracker.com/story/show/164375318
    if not groups and aggregations == [
            Aggregation(Operation.SIZE, "", Operation.SIZE.default_outname(""))
    ]:
        return ArrowRenderResult(
            table)  # no-op: users haven't entered any params

    # Error out with a quickfix if aggregations need number and we're not number
    non_numeric_colnames = []
    for aggregation in aggregations:
        if aggregation.operation.needs_numeric_column():
            colname = aggregation.colname
            column = table[colname]
            if (not pa.types.is_integer(column.type)
                    and not pa.types.is_floating(
                        column.type)) and colname not in non_numeric_colnames:
                non_numeric_colnames.append(colname)
    if non_numeric_colnames:
        return ArrowRenderResult(
            pa.table({}),
            errors=[
                RenderError(
                    i18n.trans(
                        "non_numeric_colnames.error",
                        "{n_columns, plural,"
                        ' one {Column "{first_colname}"}'
                        ' other {# columns (see "{first_colname}")}} '
                        "must be Numbers",
                        {
                            "n_columns": len(non_numeric_colnames),
                            "first_colname": non_numeric_colnames[0],
                        },
                    ),
                    quick_fixes=[
                        QuickFix(
                            i18n.trans("non_numeric_colnames.quick_fix.text",
                                       "Convert"),
                            action=QuickFixAction.PrependStep(
                                "converttexttonumber",
                                {"colnames": non_numeric_colnames},
                            ),
                        )
                    ],
                )
            ],
        )

    errors = _warn_if_using_deprecated_date_granularity(table, groups)
    if not errors and params["groups"]["group_dates"]:
        errors = [
            _generate_group_dates_help_warning(
                table.schema, frozenset(group.colname for group in groups))
        ]

    result_table = groupby(table, groups, aggregations)
    return ArrowRenderResult(result_table, errors=errors)
Exemple #27
0
def _generate_group_dates_help_warning(
        schema: pa.Schema, colnames: FrozenSet[str]) -> RenderError:
    timestamp_colnames = []
    text_colnames = []
    date_colnames_and_units = []
    for field in schema:
        if field.name not in colnames:
            continue

        if pa.types.is_date32(field.type):
            date_colnames_and_units.append(
                (field.name, field.metadata[b"unit"].decode("ascii")))
        elif pa.types.is_timestamp(field.type):
            timestamp_colnames.append(field.name)
        elif pa.types.is_string(field.type) or pa.types.is_dictionary(
                field.type):
            text_colnames.append(field.name)

    if date_colnames_and_units:
        return RenderError(
            i18n.trans(
                "group_dates.date_selected",
                "“{column0}” is Date – {unit0, select, day {day} week {week} month {month} quarter {quarter} year {year} other {}}. Edit earlier steps or use “Convert date unit” to change units.",
                dict(
                    columns=len(date_colnames_and_units),
                    column0=date_colnames_and_units[0][0],
                    unit0=date_colnames_and_units[0][1],
                ),
            ))
    if timestamp_colnames:
        return RenderError(
            i18n.trans(
                "group_dates.timestamp_selected",
                "{columns, plural, offset:1 =1 {“{column0}” is Timestamp.}=2 {“{column0}” and one other column are Timestamp.}other {“{column0}” and # other columns are Timestamp.}}",
                dict(columns=len(timestamp_colnames),
                     column0=timestamp_colnames[0]),
            ),
            [
                QuickFix(
                    i18n.trans(
                        "group_dates.quick_fix.convert_timestamp_to_date",
                        "Convert to Date",
                    ),
                    QuickFixAction.PrependStep(
                        "converttimestamptodate",
                        dict(colnames=timestamp_colnames)),
                )
            ],
        )
    if text_colnames:
        return RenderError(
            i18n.trans(
                "group_dates.text_selected",
                "{columns, plural, offset:1 =1 {“{column0}” is Text.}=2 {“{column0}” and one other column are Text.}other {“{column0}” and # other columns are Text.}}",
                dict(columns=len(text_colnames), column0=text_colnames[0]),
            ),
            [
                QuickFix(
                    i18n.trans(
                        "group_dates.quick_fix.convert_text_to_date",
                        "Convert to Date",
                    ),
                    QuickFixAction.PrependStep("converttexttodate",
                                               dict(colnames=text_colnames)),
                ),
                QuickFix(
                    i18n.trans(
                        "group_dates.quick_fix.convert_text_to_timestamp",
                        "Convert to Timestamp first",
                    ),
                    QuickFixAction.PrependStep("convert-date",
                                               dict(colnames=text_colnames)),
                ),
            ],
        )

    return RenderError(
        i18n.trans("group_dates.select_date_columns", "Select a Date column."))