예제 #1
0
async def fetch(params, *, secrets):
    access_token = (secrets.get("access_token") or {}).get("secret")
    if not access_token:
        return i18n.trans("badParam.access_token.empty",
                          "Please sign in to Intercom")
    bearer_token = access_token["access_token"]

    try:
        # 5min timeouts ... and we'll assume Intercom is quick enough
        async with httpx.AsyncClient(timeout=httpx.Timeout(300)) as client:
            users = await fetch_users(client, bearer_token)
            companies = await fetch_companies(client, bearer_token)
            segments = await fetch_segments(client, bearer_token)
            tags = await fetch_tags(client, bearer_token)
    except httpx.RequestError as err:
        return i18n.trans(
            "error.httpError.general",
            "Error querying Intercom: {error}",
            {"error": str(err)},
        )
    except RuntimeError as err:
        return i18n.trans(
            "error.unexpectedIntercomJson.general",
            "Error handling Intercom response: {error}",
            {"error": str(err)},
        )

    return build_dataframe(users, companies, segments, tags)
예제 #2
0
def eval_excel_one_row(code, table):

    # Generate a list of input table values for each range in the expression
    formula_args = []
    for token, obj in code.inputs.items():
        if obj is None:
            raise UserVisibleError(
                i18n.trans(
                    "excel.one_row.invalidCellRange",
                    "Invalid cell range: {token}",
                    {"token": token},
                ))
        ranges = obj.ranges
        if len(ranges) != 1:
            # ...not sure what input would get us here
            raise UserVisibleError(
                i18n.trans(
                    "excel.one_row.cellRangeNotRectangular",
                    "Excel range must be a rectangular block of values",
                ))
        range = ranges[0]

        # Unpack start/end row/col
        r1 = int(range["r1"]) - 1
        r2 = int(range["r2"])
        c1 = int(range["n1"]) - 1
        c2 = int(range["n2"])

        nrows, ncols = table.shape
        # allow r2 > nrows: users use it to say SUM(A1:A99999)
        if r1 < 0 or c1 < 0 or c2 > ncols or r1 >= r2 or c1 >= c2:
            raise UserVisibleError(
                i18n.trans(
                    "excel.one_row.badRef",
                    'Excel range "{ref}" is out of bounds',
                    {"ref": range["ref"]},
                ))

        # retval of code() is OperatorArray:
        # https://github.com/vinci1it2000/formulas/issues/12
        table_part = list(table.iloc[r1:r2, c1:c2].values.flat)
        formula_args.append(flatten_single_element_lists(table_part))

    # evaluate the formula just once
    # raises ValueError if function isn't implemented
    raw_value = eval_excel(code, formula_args)

    if isinstance(raw_value, Token):
        # XlError('#VALUE!') => '#VALUE!' Text
        return str(raw_value)
    return raw_value
예제 #3
0
def _render_api_error(api_endpoint: str, api_params: str, http_status: str,
                      data: bytes) -> i18n.I18nMessage:
    if http_status == "429":
        return i18n.trans(
            "error.tooManyRequests",
            "Twitter API rate limit exceeded. Please wait a few minutes and try again.",
        )

    if api_endpoint == "1.1/statuses/user_timeline":
        username = urllib.parse.parse_qs(api_params)["screen_name"][0]
        if http_status == "404":
            return i18n.trans(
                "error.userDoesNotExist",
                "User {username} does not exist",
                {"username": username},
            )
        elif http_status == "401":
            return i18n.trans(
                "error.userTweetsArePrivate",
                "User {username}'s tweets are private",
                {"username": username},
            )

    if api_endpoint.startswith("1.1/"):
        try:
            error = json.loads(data.decode("utf-8"))
            message = error["error"]
        except (KeyError, IndexError, ValueError):
            message = data.decode("utf-8")
        return i18n.trans(
            "error.genericApiErrorV1_1",
            "Error from Twitter API: {httpStatus} {error}",
            {
                "httpStatus": http_status,
                "error": message
            },
        )
    else:
        try:
            error = json.loads(data.decode("utf-8"))
            message = error["errors"][0]["message"]
        except (KeyError, IndexError, ValueError):
            message = data.decode("utf-8")
        return i18n.trans(
            "error.genericApiErrorV2",
            "Error from Twitter API: {title}: {message}",
            {
                "title": error["title"],
                "message": message
            },
        )
예제 #4
0
def sqlselect(table: pd.DataFrame, sql):
    if len(table.columns) == 0:
        return (pd.DataFrame(), [])

    with sqlite3.connect(":memory:",
                         detect_types=sqlite3.PARSE_DECLTYPES) as conn:
        table.to_sql("input", conn, index=False)

        with _deleting_cursor(conn.cursor()) as c:
            try:
                c.execute(sql)
            except sqlite3.DatabaseError as err:
                return None, _database_error_to_messages(err)
            except sqlite3.Warning as err:
                return None, _database_warning_to_messages(err)

            if c.description is None:
                return (
                    None,
                    [
                        i18n.trans(
                            "badValue.sql.commentedQuery",
                            "Your query did nothing. Did you accidentally comment it out?",
                        )
                    ],
                )

            colnames = [d[0] for d in c.description]

            dupdetect = set()
            for colname in colnames:
                if colname in dupdetect:
                    return (
                        None,
                        [
                            i18n.trans(
                                "badValue.sql.duplicateColumnName",
                                'Your query would produce two columns named {colname}. Please delete one or alias it with "AS".',
                                {"colname": colname},
                            )
                        ],
                    )
                dupdetect.add(colname)

            # Memory-inefficient: creates a Python object per value
            data = c.fetchall(
            )  # TODO benchmark c.arraysize=1000, =100000, etc.

    return pd.DataFrame.from_records(data, columns=colnames), []
예제 #5
0
def eval_excel_all_rows(code, table):
    col_idx = []
    for token, obj in code.inputs.items():
        # If the formula is valid but no object comes back it means the
        # reference is no good
        # Missing row number?
        # with only A-Z. But just in case:
        if obj is None:
            raise UserVisibleError(
                i18n.trans(
                    "excel.badCellReference",
                    "Bad cell reference {token}",
                    {"token": token},
                ))

        ranges = obj.ranges
        for rng in ranges:
            # r1 and r2 refer to which rows are referenced by the range.
            if rng["r1"] != "1" or rng["r2"] != "1":
                raise UserVisibleError(
                    i18n.trans(
                        "excel.formulaFirstRowReference",
                        "Excel formulas can only reference the first row when applied to all rows",
                    ))

            c1 = rng["n1"] - 1
            c2 = rng["n2"]

            if c1 < 0 or c2 > len(table.columns) or c1 >= c2:
                raise UserVisibleError(
                    i18n.trans(
                        "excel.all_rows.badColumnRef",
                        'Excel range "{ref}" is out of bounds',
                        {"ref": rng["ref"]},
                    ))

            col_idx.append(list(range(c1, c2)))

    newcol = []
    for row in table.values:
        args_to_excel = [
            flatten_single_element_lists([row[idx] for idx in col])
            for col in col_idx
        ]
        # raises ValueError if function isn't implemented
        newcol.append(eval_excel(code, args_to_excel))

    return pd.Series(newcol)
예제 #6
0
 def _disabled(*args, **kwargs):
     raise UserVisibleError(
         i18n.trans(
             "python.disabledFunction",
             "{name} is disabled",
             {"name": "builtins.%s" % name},
         ))
예제 #7
0
def _render_file(path: Path, output_path: Path, params: Dict[str, Any]):
    with httpfile.read(path) as (parameters, status_line, headers, body_path):
        content_type = httpfile.extract_first_header(headers,
                                                     "Content-Type") or ""
        content_disposition = httpfile.extract_first_header(
            headers, "Content-Disposition")

        mime_type = guess_mime_type_or_none(content_type, content_disposition,
                                            parameters["url"])
        if not mime_type:
            return [
                trans(
                    "error.unhandledContentType",
                    "Server responded with unhandled Content-Type {content_type}. "
                    "Please use a different URL.",
                    {"content_type": content_type},
                )
            ]
        maybe_charset = guess_charset_or_none(content_type)

        return parse_file(
            body_path,
            output_path=output_path,
            encoding=maybe_charset,
            mime_type=mime_type,
            has_header=params["has_header"],
        )
예제 #8
0
def parse_interval(s: str) -> Tuple[int, int]:
    """
    Parse a string 'interval' into a tuple
    >>> parse_interval('1')
    (0, 1)
    >>> parse_interval('1-3')
    (0, 2)
    >>> parse_interval('5')
    (4, 4)
    >>> parse_interval('hi')
    Traceback (most recent call last):
        ...
    ValueError: Column numbers must look like "1-2", "5" or "1-2, 5"; got "hi"
    """
    match = numbers.fullmatch(s)
    if not match:
        raise UserVisibleError(
            i18n.trans(
                "badParam.column_numbers.invalid",
                'Column numbers must look like "1-2", "5" or "1-2, 5"; got "{value}"',
                {"value": s},
            ))

    first = int(match.group("first"))
    last = int(match.group("last") or first)
    return (first - 1, last - 1)
예제 #9
0
    def _make_x_series_and_mask(
        self, table: pd.DataFrame, input_columns: Dict[str, Any]
    ) -> Tuple[XSeries, np.array]:
        """Create an XSeries ready for charting, or raise GentleValueError."""
        if not self.x_column:
            raise GentleValueError(
                i18n.trans("noXAxisError.message", "Please choose an X-axis column")
            )

        series = table[self.x_column]
        column = input_columns[self.x_column]
        nulls = series.isna()
        safe_x_values = series[~nulls]  # so we can min(), len(), etc
        safe_x_values.reset_index(drop=True, inplace=True)

        if column.type == "text" and len(safe_x_values) > MaxNAxisLabels:
            raise GentleValueError(
                i18n.trans(
                    "tooManyTextValuesError.message",
                    'Column "{x_column}" has {n_safe_x_values} text values. We cannot fit them all on the X axis. '
                    'Please change the input table to have 10 or fewer rows, or convert "{x_column}" to number or date.',
                    {
                        "x_column": self.x_column,
                        "n_safe_x_values": len(safe_x_values),
                    },
                )
            )

        if not len(safe_x_values):
            raise GentleValueError(
                i18n.trans(
                    "noValuesError.message",
                    'Column "{column_name}" has no values. Please select a column with data.',
                    {"column_name": self.x_column},
                )
            )

        if not len(safe_x_values[safe_x_values != safe_x_values[0]]):
            raise GentleValueError(
                i18n.trans(
                    "onlyOneValueError.message",
                    'Column "{column_name}" has only 1 value. Please select a column with 2 or more values.',
                    {"column_name": self.x_column},
                )
            )

        return XSeries(safe_x_values, column), ~nulls
예제 #10
0
def _database_warning_to_messages(
        err: sqlite3.Warning) -> List[i18n.I18nMessage]:
    if err.args[0] == "You can only execute one statement at a time.":
        return [
            i18n.trans(
                "badValue.sql.tooManyCommands",
                "Only one query is allowed. Please remove the semicolon (;).",
            )
        ]

    return [str(err)]  # it's English
예제 #11
0
def render(table, params):
    sql = params["sql"]
    if not sql.strip():
        return (
            None,
            [
                i18n.trans("badParam.sql.missing",
                           "Missing SQL SELECT statement")
            ],
        )

    return sqlselect(table, sql)
예제 #12
0
def _build_arrow_table(db_lz4_path: Path, query_slug: str) -> pa.Table:
    """Main logic. Used by render() and by command-line script."""
    with _open_sqlite3_lz4_file(db_lz4_path) as db:
        validate_database(db)  # raises sqlite3.DatabaseError

        try:
            arrow_table = query_database(db, query_slug)
            return arrow_table, []
        except sqlite3.ProgrammingError:
            return None, [
                i18n.trans("error.queryError", "Please upload a newer file.")
            ]
예제 #13
0
def render(table, params, *, input_columns):
    column = params['column']
    values = params['valueselect']

    if not column or not values:
        return table  # no-op

    if input_columns[column].type != 'text':
        return i18n.trans("badParam.column.notText",
                          'Please convert this column to Text first.')

    return value_filter(table, column, values, params['drop'])
예제 #14
0
def render(arrow_table: pa.Table, params, output_path, **kwargs):
    try:
        output_table = _filter_table(arrow_table, params)
    except ConditionError as err:
        return [
            i18n.trans(
                "regexParseError.message",
                "Regex parse error: {error}",
                {"error": e.msg},
            ) for e in err.errors
        ]

    with pa.ipc.RecordBatchFileWriter(output_path,
                                      output_table.schema) as writer:
        writer.write_table(output_table)
    return []
예제 #15
0
def _database_error_to_messages(
    err: sqlite3.DatabaseError, ) -> List[Union[i18n.I18nMessage, str]]:
    if isinstance(err, sqlite3.OperationalError) and err.args[0].startswith(
            "no such table: "):
        return [
            i18n.trans(
                "badValue.sql.invalidTableName",
                'The only valid table name is "{table_name}"',
                {"table_name": "input"},
            )
        ]

    if err.args[0].startswith("near "):
        return [f"SQL error {str(err)}"]  # it's English

    return [str(err)]  # it's English
예제 #16
0
 def i18n_message(self):
     return i18n.trans(
         "ErrorCount.message",
         "“{a_value}” in row {a_row} of “{a_column}” cannot be converted. "
         "{n_errors, plural, "
         "  one {Overall, there is # error in {n_columns, plural, other {# columns} one {# column}}.} "
         "  other {Overall, there are # errors in {n_columns, plural, other {# columns} one {# column}}.} "
         "} "
         "Select 'non-dates to null' to set these values to null.",
         {
             "a_value": self.a_value,
             "a_row": self.a_row + 1,
             "a_column": self.a_column,
             "n_errors": self.total,
             "n_columns": self.n_columns,
         },
     )
예제 #17
0
def _render_deprecated_parquet(input_path: Path, errors, output_path: Path,
                               params: Dict[str, Any]) -> List[I18nMessage]:
    cjwparquet.convert_parquet_file_to_arrow_file(input_path, output_path)
    if params["has_header"]:
        # In the deprecated parquet format, we _always_ parsed the header
        pass
    else:
        # We used to have a "moduleutils.turn_header_into_first_row()" but it
        # was broken by design (what about types???) and it was rarely used.
        # Let's not maintain it any longer.
        errors += [
            trans(
                "prompt.disableHeaderHandling",
                "Please re-download this file to disable header-row handling",
            )
        ]

    return errors
예제 #18
0
def select_columns_by_number(table, str_col_nums):
    """
    Return a list of column names, or raise ValueError.
    """
    index = parse_interval_index(str_col_nums)  # raises ValueError

    table_col_nums = list(range(0, len(table.columns)))

    try:
        mask = index.get_indexer(table_col_nums) != -1
    except InvalidIndexError:
        raise UserVisibleError(
            i18n.trans(
                "badParam.column_numbers.overlapping",
                "There are overlapping numbers in input range",
            ))

    return list(table.columns[mask])
예제 #19
0
def _parse_custom_list(
        custom_list: str, table_columns: List[str], *,
        settings: Settings) -> Tuple[Dict[str, str], List[i18n.I18nMessage]]:
    """
    Convert `custom_list` into a valid mapping for `table_columns`.

    Return a minimal and valid dict from old colname to new colname.

    Raise `ValueError` if the user entered too many column names.

    `custom_list` is a textarea filled in by a user, separated by
    commas/newlines. (We prefer newlines, but if the user writes a
    comma-separated list we use commas.) The logic to handle this: do _all_
    the user's renames at once, and then queue extra renames for columns
    that end up with duplicate names. Those extra renames are handled
    left-to-right (the order of `table_columns` matters).
    """
    # Chomp trailing newline, in case the user enters "A,B,C\n".
    custom_list = custom_list.rstrip()

    # Split by newline (preferred) or comma (if the user wants that)
    if "\n" in custom_list:
        split_char = "\n"
    else:
        split_char = ","
    rename_list = [s.strip() for s in custom_list.split(split_char)]

    # Convert to dict
    try:
        renames = {table_columns[i]: s for i, s in enumerate(rename_list) if s}
    except IndexError:
        raise UserVisibleError(
            i18n.trans(
                "badParam.custom_list.wrongNumberOfNames",
                "You supplied {n_names, plural, other {# column names} one {# column name}}, "
                "but the table has {n_columns, plural, other {# columns} one {# column}}.",
                {
                    "n_names": len(rename_list),
                    "n_columns": len(table_columns)
                },
            ))

    # Use _parse_renames() logic to consider missing columns and uniquify
    return _parse_renames(renames, table_columns, settings=settings)
예제 #20
0
def render_arrow_v1(arrow_table, params, *, uploaded_files, **kwargs):
    if params["file"] is None:
        return ArrowRenderResult(pa.table({}))

    path = uploaded_files[params["file"]].path

    try:
        arrow_table, errors = _build_arrow_table(path, params["query_slug"])
    except (InvalidLz4File, sqlite3.DatabaseError):
        return ArrowRenderResult(
            pa.table({}),
            [
                RenderError(
                    i18n.trans("error.invalidFile",
                               "Please upload a valid .sqlite3.lz4 file."))
            ],
        )

    return ArrowRenderResult(arrow_table, errors=errors)
예제 #21
0
def render(table, params, *, input_columns, settings: Settings):
    operation = params["operation"]

    if operation == "widetolong":
        if (not params["key_colnames"] or not params["wtl_varcolname"]
                or not params["wtl_valcolname"]):
            # missing parameters
            return table

        return wide_to_long(
            table,
            key_colnames=params["key_colnames"],
            variable_colname=params["wtl_varcolname"],
            value_colname=params["wtl_valcolname"],
        )

    elif operation == "longtowide":
        if not params["key_colnames"] or not params["ltw_varcolname"]:
            # missing parameters
            return table

        if params["ltw_varcolname"] in params["key_colnames"]:
            return i18n.trans(
                "error.sameColumnAndRowVariables",
                "Cannot reshape: column and row variables must be different",
            )

        return long_to_wide(
            table,
            key_colnames=params["key_colnames"],
            variable_colname=params["ltw_varcolname"],
            settings=settings,
        )

    elif operation == "transpose":
        return transpose(
            table,
            # Backwards-compat because we published it like this way back when
            {"firstcolname": "New Column"},
            input_columns=input_columns,
            settings=settings,
        )
예제 #22
0
def eval_excel(code, args):
    """Return result of running Excel code with args.

    Raise UserVisibleError if a function is unimplemented.
    """
    try:
        ret = code(*args)
    except DispatcherError as err:
        if isinstance(err.args[2], NotImplementedError):
            raise UserVisibleError(
                i18n.trans(
                    "excel.functionNotImplemented",
                    "Function {name} not implemented",
                    {"name": err.args[1]},
                ))
        else:
            raise
    if isinstance(ret, np.ndarray):
        return ret.item()
    else:
        return ret
예제 #23
0
def excel_formula(table, formula, all_rows):
    try:
        # 0 is a list of tokens, 1 is the function builder object
        code = Parser().ast(formula)[1].compile()
    except Exception as e:
        raise UserVisibleError(
            i18n.trans(
                "excel.invalidFormula",
                "Couldn't parse formula: {error}",
                {"error": str(e)},
            ))

    if all_rows:
        newcol = eval_excel_all_rows(code, table)
        newcol = autocast_series_dtype(sanitize_series(newcol))
    else:
        # the whole column is blank except first row
        value = eval_excel_one_row(code, table)
        newcol = pd.Series([value] + [None] * (len(table) - 1))

    return newcol
def render(table, params):
    if not params['colnames']:
        return table

    for column in params['colnames']:
        series = table[column]
        fractions = series / series.sum()
        if fractions.isin([np.inf, -np.inf]).any():
            return i18n.trans(
                "badData.columnSum.isZero",
                'The sum of "{column}" is 0, so we cannot calculate percentages '
                'in it.', {"column": column})
        # We avoid duplicate columns by overwriting if there's a conflict
        table['percent_' + column] = fractions

    return {
        'dataframe': table,
        'column_formats':
        {f'percent_{c}': '{:,.1%}'
         for c in params['colnames']},
    }
예제 #25
0
def _render_startof(table: pa.Table, colnames: List[str],
                    unit: str) -> ArrowRenderResult:
    truncated = False
    for colname in colnames:
        i = table.column_names.index(colname)
        column_result = _startof(table.columns[i], unit)
        table = table.set_column(i, colname, column_result.column)
        if column_result.truncated:
            truncated = True

    if truncated:
        errors = [
            RenderError(
                trans(
                    "warning.convertedOutOfBoundsToNull",
                    "Converted timestamp {timestamp} to null because it is out of bounds.",
                    {"timestamp": _out_of_bounds_timestamp(unit)},
                ))
        ]
    else:
        errors = []

    return ArrowRenderResult(table, errors=errors)
예제 #26
0
    def validate_with_table(self, table: pd.DataFrame,
                            input_columns: Dict[str, Any]) -> SeriesParams:
        """
        Create a SeriesParams ready for charting, or raises ValueError.

        Features ([tested?]):
        [x] Error if X column is missing
        [x] Error if no Y columns chosen
        [x] Error if no rows
        [x] Nix null X values
        [x] Error if too many bars
        [x] What if a Y column is not numeric? framework saves us
        [x] What if a Y column is the X column? framework saves us: x is text, y is numeric
        [x] Default title, X and Y axis labels
        """
        if not self.x_column:
            raise GentleValueError(
                i18n.trans("noXAxisError.message",
                           "Please choose an X-axis column"))
        if not self.y_columns:
            raise GentleValueError(
                i18n.trans("noYAxisError.message",
                           "Please choose a Y-axis column"))

        x_series_with_nulls = table[self.x_column]
        x_mask = ~(pd.isna(x_series_with_nulls))
        x_series = XSeries(
            pd.Series(x_series_with_nulls[x_mask],
                      index=None,
                      name=self.x_column))

        if len(x_series.series) > MaxNBars:
            raise GentleValueError(
                i18n.trans(
                    "tooManyBarsError.message",
                    "Column chart can visualize a maximum of {MaxNBars} bars",
                    {"MaxNBars": MaxNBars},
                ))

        if not len(x_series.series):
            raise GentleValueError(
                i18n.trans("nothingToPlotError.message", "no records to plot"))

        y_columns = []
        for y_column in self.y_columns:
            y_series_with_nulls = table[y_column.column]
            y_series = pd.Series(y_series_with_nulls[x_mask],
                                 index=None,
                                 name=y_column.column)
            y_columns.append(YSeries(y_series, y_column.color))

        x_axis_label = self.x_axis_label or x_series.name
        y_axis_label = self.y_axis_label or y_columns[0].name
        y_label_format = python_format_to_d3_tick_format(
            input_columns[y_columns[0].name].format)

        return SeriesParams(
            title=self.title,
            x_axis_label=x_axis_label,
            y_axis_label=y_axis_label,
            x_series=x_series,
            y_columns=y_columns,
            y_label_format=y_label_format,
        )
예제 #27
0
def _generate_group_dates_help_warning(
        schema: pa.Schema, colnames: FrozenSet[str]) -> RenderError:
    timestamp_colnames = []
    text_colnames = []
    date_colnames_and_units = []
    for field in schema:
        if field.name not in colnames:
            continue

        if pa.types.is_date32(field.type):
            date_colnames_and_units.append(
                (field.name, field.metadata[b"unit"].decode("ascii")))
        elif pa.types.is_timestamp(field.type):
            timestamp_colnames.append(field.name)
        elif pa.types.is_string(field.type) or pa.types.is_dictionary(
                field.type):
            text_colnames.append(field.name)

    if date_colnames_and_units:
        return RenderError(
            i18n.trans(
                "group_dates.date_selected",
                "“{column0}” is Date – {unit0, select, day {day} week {week} month {month} quarter {quarter} year {year} other {}}. Edit earlier steps or use “Convert date unit” to change units.",
                dict(
                    columns=len(date_colnames_and_units),
                    column0=date_colnames_and_units[0][0],
                    unit0=date_colnames_and_units[0][1],
                ),
            ))
    if timestamp_colnames:
        return RenderError(
            i18n.trans(
                "group_dates.timestamp_selected",
                "{columns, plural, offset:1 =1 {“{column0}” is Timestamp.}=2 {“{column0}” and one other column are Timestamp.}other {“{column0}” and # other columns are Timestamp.}}",
                dict(columns=len(timestamp_colnames),
                     column0=timestamp_colnames[0]),
            ),
            [
                QuickFix(
                    i18n.trans(
                        "group_dates.quick_fix.convert_timestamp_to_date",
                        "Convert to Date",
                    ),
                    QuickFixAction.PrependStep(
                        "converttimestamptodate",
                        dict(colnames=timestamp_colnames)),
                )
            ],
        )
    if text_colnames:
        return RenderError(
            i18n.trans(
                "group_dates.text_selected",
                "{columns, plural, offset:1 =1 {“{column0}” is Text.}=2 {“{column0}” and one other column are Text.}other {“{column0}” and # other columns are Text.}}",
                dict(columns=len(text_colnames), column0=text_colnames[0]),
            ),
            [
                QuickFix(
                    i18n.trans(
                        "group_dates.quick_fix.convert_text_to_date",
                        "Convert to Date",
                    ),
                    QuickFixAction.PrependStep("converttexttodate",
                                               dict(colnames=text_colnames)),
                ),
                QuickFix(
                    i18n.trans(
                        "group_dates.quick_fix.convert_text_to_timestamp",
                        "Convert to Timestamp first",
                    ),
                    QuickFixAction.PrependStep("convert-date",
                                               dict(colnames=text_colnames)),
                ),
            ],
        )

    return RenderError(
        i18n.trans("group_dates.select_date_columns", "Select a Date column."))
예제 #28
0
    def make_chart(self, table: pd.DataFrame,
                   input_columns: Dict[str, Any]) -> Chart:
        """Create a Chart ready for charting, or raise GentleValueError.

        Features:
        * Error if X column is missing
        * Error if X column does not have two values
        * Error if X column is all-NaN
        * Error if too many X values in text mode (since we can't chart them)
        * X column can be number or date
        * Missing X dates lead to missing records
        * Missing X floats lead to missing records
        * Missing Y values are omitted
        * Error if no Y columns chosen
        * Error if a Y column is the X column
        * Error if a Y column has fewer than 1 non-missing value
        * Default title, X and Y axis labels
        """
        x_series, mask = self._make_x_series_and_mask(table, input_columns)

        if not self.y_columns:
            raise GentleValueError(
                i18n.trans("noYAxisError.message",
                           "Please choose a Y-axis column"))

        y_serieses = []
        for ycolumn in self.y_columns:
            if ycolumn.column == self.x_column:
                raise GentleValueError(
                    i18n.trans(
                        "sameAxesError.message",
                        "You cannot plot Y-axis column {column_name} because it is the X-axis column",
                        {"column_name": ycolumn.column},
                    ))

            series = table[ycolumn.column]

            if not is_numeric_dtype(series.dtype):
                raise GentleValueError(
                    i18n.trans(
                        "axisNotNumericError.message",
                        'Cannot plot Y-axis column "{column_name}" because it is not numeric. '
                        "Convert it to a number before plotting it.",
                        {"column_name": ycolumn.column},
                    ))

            series = series[mask]  # line up with x_series
            series.reset_index(drop=True, inplace=True)

            # Find how many Y values can actually be plotted on the X axis. If
            # there aren't going to be any Y values on the chart, raise an
            # error.
            if not series.count():
                raise GentleValueError(
                    i18n.trans(
                        "emptyAxisError.message",
                        'Cannot plot Y-axis column "{column_name}" because it has no values',
                        {"column_name": ycolumn.column},
                    ))

            y_serieses.append(
                YSeries(series, ycolumn.color,
                        input_columns[ycolumn.column].format))

        title = self.title or "Line Chart"
        x_axis_label = self.x_axis_label or x_series.name
        if len(y_serieses) == 1:
            y_axis_label = self.y_axis_label or y_serieses[0].name
        else:
            y_axis_label = self.y_axis_label

        return Chart(
            title=title,
            x_axis_label=x_axis_label,
            x_axis_tick_format=x_series.d3_tick_format,
            y_axis_label=y_axis_label,
            x_series=x_series,
            y_serieses=y_serieses,
            y_axis_tick_format=y_serieses[0].d3_tick_format,
        )
예제 #29
0
def render_arrow_v1(table: pa.Table, params: Dict[str, Any],
                    **kwargs) -> ArrowRenderResult:
    colnames = table.column_names
    date_colnames = frozenset(colname for colname in colnames
                              if pa.types.is_timestamp(table[colname].type))
    groups = parse_groups(date_colnames=date_colnames, **params["groups"])
    aggregations = parse_aggregations(params["aggregations"])

    # HACK: set the same default aggregations as we do in our JavaScript component.
    if not aggregations:
        aggregations.append(
            Aggregation(Operation.SIZE, "",
                        Operation.SIZE.default_outname("")))

    # This is a "Group By" module so we need to support the obvious operation,
    # 'SELECT COUNT(*) FROM input'. The obvious way to display that is to select
    # "Count" and not select a Group By column.
    #
    # ... and unfortunately, that form setup -- no columns selected, one
    # "Count" aggregation selected -- is exactly what the user sees by default
    # after adding the module, before step 1 of the onboarding path.
    #
    # So we get a tough choice: either make "no aggregations" a no-op to give
    # us the ideal onboarding path, _OR_ make "no aggregations" default to
    # "count", to support the obvious operation. Pick one: complete+simple, or
    # onboarding-friendly.
    #
    # For now, we're onboarding-friendly and we don't allow SELECT COUNT(*).
    # When we solve https://www.pivotaltracker.com/story/show/163264164 we
    # should change to be complete+simple (because the onboarding will have
    # another answer). That's
    # https://www.pivotaltracker.com/story/show/164375318
    if not groups and aggregations == [
            Aggregation(Operation.SIZE, "", Operation.SIZE.default_outname(""))
    ]:
        return ArrowRenderResult(
            table)  # no-op: users haven't entered any params

    # Error out with a quickfix if aggregations need number and we're not number
    non_numeric_colnames = []
    for aggregation in aggregations:
        if aggregation.operation.needs_numeric_column():
            colname = aggregation.colname
            column = table[colname]
            if (not pa.types.is_integer(column.type)
                    and not pa.types.is_floating(
                        column.type)) and colname not in non_numeric_colnames:
                non_numeric_colnames.append(colname)
    if non_numeric_colnames:
        return ArrowRenderResult(
            pa.table({}),
            errors=[
                RenderError(
                    i18n.trans(
                        "non_numeric_colnames.error",
                        "{n_columns, plural,"
                        ' one {Column "{first_colname}"}'
                        ' other {# columns (see "{first_colname}")}} '
                        "must be Numbers",
                        {
                            "n_columns": len(non_numeric_colnames),
                            "first_colname": non_numeric_colnames[0],
                        },
                    ),
                    quick_fixes=[
                        QuickFix(
                            i18n.trans("non_numeric_colnames.quick_fix.text",
                                       "Convert"),
                            action=QuickFixAction.PrependStep(
                                "converttexttonumber",
                                {"colnames": non_numeric_colnames},
                            ),
                        )
                    ],
                )
            ],
        )

    errors = _warn_if_using_deprecated_date_granularity(table, groups)
    if not errors and params["groups"]["group_dates"]:
        errors = [
            _generate_group_dates_help_warning(
                table.schema, frozenset(group.colname for group in groups))
        ]

    result_table = groupby(table, groups, aggregations)
    return ArrowRenderResult(result_table, errors=errors)
예제 #30
0
def render(table, params, *, input_columns, settings: Settings):
    warnings = []
    colnames_auto_converted_to_text = []

    if len(table) > settings.MAX_COLUMNS_PER_TABLE:
        table = table.truncate(after=settings.MAX_COLUMNS_PER_TABLE - 1)
        warnings.append(
            i18n.trans(
                "warnings.tooManyRows",
                "We truncated the input to {max_columns} rows so the "
                "transposed table would have a reasonable number of columns.",
                {"max_columns": settings.MAX_COLUMNS_PER_TABLE},
            )
        )

    if not len(table.columns):
        # happens if we're the first module in the module stack
        return pd.DataFrame()

    column = table.columns[0]
    first_column = table[column]
    table.drop(column, axis=1, inplace=True)

    if input_columns[column].type != "text":
        warnings.append(
            {
                "message": i18n.trans(
                    "warnings.headersConvertedToText.message",
                    'Headers in column "{column_name}" were auto-converted to text.',
                    {"column_name": column},
                ),
                "quickFixes": [
                    {
                        "text": i18n.trans(
                            "warnings.headersConvertedToText.quickFix.text",
                            "Convert {column_name} to text",
                            {"column_name": '"%s"' % column},
                        ),
                        "action": "prependModule",
                        "args": [
                            "converttotext",
                            {"colnames": [column]},
                        ],
                    }
                ],
            }
        )

    # Ensure headers are string. (They will become column names.)
    # * categorical => str
    # * nan => ""
    # * non-text => str
    na = first_column.isna()
    first_column = first_column.astype(str)
    first_column[na] = ""  # Empty values are all equivalent

    gen_headers_result = _gen_colnames_and_warn(
        params["firstcolname"], first_column, settings
    )
    warnings.extend(gen_headers_result.warnings)

    input_types = set(c.type for c in input_columns.values() if c.name != column)
    if len(input_types) > 1:
        # Convert everything to text before converting. (All values must have
        # the same type.)
        to_convert = [c for c in table.columns if input_columns[c].type != "text"]
        if to_convert:
            warnings.append(
                {
                    "message": i18n.trans(
                        "warnings.differentColumnTypes.message",
                        '{n_columns, plural, other {# columns (see "{first_colname}") were} one {Column "{first_colname}" was}} '
                        "auto-converted to Text because all columns must have the same type.",
                        {"n_columns": len(to_convert), "first_colname": to_convert[0]},
                    ),
                    "quickFixes": [
                        {
                            "text": i18n.trans(
                                "warnings.differentColumnTypes.quickFix.text",
                                "Convert {n_columns, plural, other {# columns} one {# column}} to text",
                                {"n_columns": len(to_convert)},
                            ),
                            "action": "prependModule",
                            "args": [
                                "converttotext",
                                {"colnames": to_convert},
                            ],
                        }
                    ],
                }
            )

        for colname in to_convert:
            # TODO respect column formats ... and nix the quick-fix?
            na = table[colname].isnull()
            table[colname] = table[colname].astype(str)
            table[colname][na] = np.nan

    # The actual transpose
    table.index = gen_headers_result.names[1:]
    ret = table.T
    # Set the name of the index: it will become the name of the first column.
    ret.index.name = gen_headers_result.names[0]
    # Make the index (former colnames) a column
    ret.reset_index(inplace=True)

    if warnings:
        return (ret, warnings)
    else:
        return ret