async def fetch(params, *, secrets): access_token = (secrets.get("access_token") or {}).get("secret") if not access_token: return i18n.trans("badParam.access_token.empty", "Please sign in to Intercom") bearer_token = access_token["access_token"] try: # 5min timeouts ... and we'll assume Intercom is quick enough async with httpx.AsyncClient(timeout=httpx.Timeout(300)) as client: users = await fetch_users(client, bearer_token) companies = await fetch_companies(client, bearer_token) segments = await fetch_segments(client, bearer_token) tags = await fetch_tags(client, bearer_token) except httpx.RequestError as err: return i18n.trans( "error.httpError.general", "Error querying Intercom: {error}", {"error": str(err)}, ) except RuntimeError as err: return i18n.trans( "error.unexpectedIntercomJson.general", "Error handling Intercom response: {error}", {"error": str(err)}, ) return build_dataframe(users, companies, segments, tags)
def eval_excel_one_row(code, table): # Generate a list of input table values for each range in the expression formula_args = [] for token, obj in code.inputs.items(): if obj is None: raise UserVisibleError( i18n.trans( "excel.one_row.invalidCellRange", "Invalid cell range: {token}", {"token": token}, )) ranges = obj.ranges if len(ranges) != 1: # ...not sure what input would get us here raise UserVisibleError( i18n.trans( "excel.one_row.cellRangeNotRectangular", "Excel range must be a rectangular block of values", )) range = ranges[0] # Unpack start/end row/col r1 = int(range["r1"]) - 1 r2 = int(range["r2"]) c1 = int(range["n1"]) - 1 c2 = int(range["n2"]) nrows, ncols = table.shape # allow r2 > nrows: users use it to say SUM(A1:A99999) if r1 < 0 or c1 < 0 or c2 > ncols or r1 >= r2 or c1 >= c2: raise UserVisibleError( i18n.trans( "excel.one_row.badRef", 'Excel range "{ref}" is out of bounds', {"ref": range["ref"]}, )) # retval of code() is OperatorArray: # https://github.com/vinci1it2000/formulas/issues/12 table_part = list(table.iloc[r1:r2, c1:c2].values.flat) formula_args.append(flatten_single_element_lists(table_part)) # evaluate the formula just once # raises ValueError if function isn't implemented raw_value = eval_excel(code, formula_args) if isinstance(raw_value, Token): # XlError('#VALUE!') => '#VALUE!' Text return str(raw_value) return raw_value
def _render_api_error(api_endpoint: str, api_params: str, http_status: str, data: bytes) -> i18n.I18nMessage: if http_status == "429": return i18n.trans( "error.tooManyRequests", "Twitter API rate limit exceeded. Please wait a few minutes and try again.", ) if api_endpoint == "1.1/statuses/user_timeline": username = urllib.parse.parse_qs(api_params)["screen_name"][0] if http_status == "404": return i18n.trans( "error.userDoesNotExist", "User {username} does not exist", {"username": username}, ) elif http_status == "401": return i18n.trans( "error.userTweetsArePrivate", "User {username}'s tweets are private", {"username": username}, ) if api_endpoint.startswith("1.1/"): try: error = json.loads(data.decode("utf-8")) message = error["error"] except (KeyError, IndexError, ValueError): message = data.decode("utf-8") return i18n.trans( "error.genericApiErrorV1_1", "Error from Twitter API: {httpStatus} {error}", { "httpStatus": http_status, "error": message }, ) else: try: error = json.loads(data.decode("utf-8")) message = error["errors"][0]["message"] except (KeyError, IndexError, ValueError): message = data.decode("utf-8") return i18n.trans( "error.genericApiErrorV2", "Error from Twitter API: {title}: {message}", { "title": error["title"], "message": message }, )
def sqlselect(table: pd.DataFrame, sql): if len(table.columns) == 0: return (pd.DataFrame(), []) with sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_DECLTYPES) as conn: table.to_sql("input", conn, index=False) with _deleting_cursor(conn.cursor()) as c: try: c.execute(sql) except sqlite3.DatabaseError as err: return None, _database_error_to_messages(err) except sqlite3.Warning as err: return None, _database_warning_to_messages(err) if c.description is None: return ( None, [ i18n.trans( "badValue.sql.commentedQuery", "Your query did nothing. Did you accidentally comment it out?", ) ], ) colnames = [d[0] for d in c.description] dupdetect = set() for colname in colnames: if colname in dupdetect: return ( None, [ i18n.trans( "badValue.sql.duplicateColumnName", 'Your query would produce two columns named {colname}. Please delete one or alias it with "AS".', {"colname": colname}, ) ], ) dupdetect.add(colname) # Memory-inefficient: creates a Python object per value data = c.fetchall( ) # TODO benchmark c.arraysize=1000, =100000, etc. return pd.DataFrame.from_records(data, columns=colnames), []
def eval_excel_all_rows(code, table): col_idx = [] for token, obj in code.inputs.items(): # If the formula is valid but no object comes back it means the # reference is no good # Missing row number? # with only A-Z. But just in case: if obj is None: raise UserVisibleError( i18n.trans( "excel.badCellReference", "Bad cell reference {token}", {"token": token}, )) ranges = obj.ranges for rng in ranges: # r1 and r2 refer to which rows are referenced by the range. if rng["r1"] != "1" or rng["r2"] != "1": raise UserVisibleError( i18n.trans( "excel.formulaFirstRowReference", "Excel formulas can only reference the first row when applied to all rows", )) c1 = rng["n1"] - 1 c2 = rng["n2"] if c1 < 0 or c2 > len(table.columns) or c1 >= c2: raise UserVisibleError( i18n.trans( "excel.all_rows.badColumnRef", 'Excel range "{ref}" is out of bounds', {"ref": rng["ref"]}, )) col_idx.append(list(range(c1, c2))) newcol = [] for row in table.values: args_to_excel = [ flatten_single_element_lists([row[idx] for idx in col]) for col in col_idx ] # raises ValueError if function isn't implemented newcol.append(eval_excel(code, args_to_excel)) return pd.Series(newcol)
def _disabled(*args, **kwargs): raise UserVisibleError( i18n.trans( "python.disabledFunction", "{name} is disabled", {"name": "builtins.%s" % name}, ))
def _render_file(path: Path, output_path: Path, params: Dict[str, Any]): with httpfile.read(path) as (parameters, status_line, headers, body_path): content_type = httpfile.extract_first_header(headers, "Content-Type") or "" content_disposition = httpfile.extract_first_header( headers, "Content-Disposition") mime_type = guess_mime_type_or_none(content_type, content_disposition, parameters["url"]) if not mime_type: return [ trans( "error.unhandledContentType", "Server responded with unhandled Content-Type {content_type}. " "Please use a different URL.", {"content_type": content_type}, ) ] maybe_charset = guess_charset_or_none(content_type) return parse_file( body_path, output_path=output_path, encoding=maybe_charset, mime_type=mime_type, has_header=params["has_header"], )
def parse_interval(s: str) -> Tuple[int, int]: """ Parse a string 'interval' into a tuple >>> parse_interval('1') (0, 1) >>> parse_interval('1-3') (0, 2) >>> parse_interval('5') (4, 4) >>> parse_interval('hi') Traceback (most recent call last): ... ValueError: Column numbers must look like "1-2", "5" or "1-2, 5"; got "hi" """ match = numbers.fullmatch(s) if not match: raise UserVisibleError( i18n.trans( "badParam.column_numbers.invalid", 'Column numbers must look like "1-2", "5" or "1-2, 5"; got "{value}"', {"value": s}, )) first = int(match.group("first")) last = int(match.group("last") or first) return (first - 1, last - 1)
def _make_x_series_and_mask( self, table: pd.DataFrame, input_columns: Dict[str, Any] ) -> Tuple[XSeries, np.array]: """Create an XSeries ready for charting, or raise GentleValueError.""" if not self.x_column: raise GentleValueError( i18n.trans("noXAxisError.message", "Please choose an X-axis column") ) series = table[self.x_column] column = input_columns[self.x_column] nulls = series.isna() safe_x_values = series[~nulls] # so we can min(), len(), etc safe_x_values.reset_index(drop=True, inplace=True) if column.type == "text" and len(safe_x_values) > MaxNAxisLabels: raise GentleValueError( i18n.trans( "tooManyTextValuesError.message", 'Column "{x_column}" has {n_safe_x_values} text values. We cannot fit them all on the X axis. ' 'Please change the input table to have 10 or fewer rows, or convert "{x_column}" to number or date.', { "x_column": self.x_column, "n_safe_x_values": len(safe_x_values), }, ) ) if not len(safe_x_values): raise GentleValueError( i18n.trans( "noValuesError.message", 'Column "{column_name}" has no values. Please select a column with data.', {"column_name": self.x_column}, ) ) if not len(safe_x_values[safe_x_values != safe_x_values[0]]): raise GentleValueError( i18n.trans( "onlyOneValueError.message", 'Column "{column_name}" has only 1 value. Please select a column with 2 or more values.', {"column_name": self.x_column}, ) ) return XSeries(safe_x_values, column), ~nulls
def _database_warning_to_messages( err: sqlite3.Warning) -> List[i18n.I18nMessage]: if err.args[0] == "You can only execute one statement at a time.": return [ i18n.trans( "badValue.sql.tooManyCommands", "Only one query is allowed. Please remove the semicolon (;).", ) ] return [str(err)] # it's English
def render(table, params): sql = params["sql"] if not sql.strip(): return ( None, [ i18n.trans("badParam.sql.missing", "Missing SQL SELECT statement") ], ) return sqlselect(table, sql)
def _build_arrow_table(db_lz4_path: Path, query_slug: str) -> pa.Table: """Main logic. Used by render() and by command-line script.""" with _open_sqlite3_lz4_file(db_lz4_path) as db: validate_database(db) # raises sqlite3.DatabaseError try: arrow_table = query_database(db, query_slug) return arrow_table, [] except sqlite3.ProgrammingError: return None, [ i18n.trans("error.queryError", "Please upload a newer file.") ]
def render(table, params, *, input_columns): column = params['column'] values = params['valueselect'] if not column or not values: return table # no-op if input_columns[column].type != 'text': return i18n.trans("badParam.column.notText", 'Please convert this column to Text first.') return value_filter(table, column, values, params['drop'])
def render(arrow_table: pa.Table, params, output_path, **kwargs): try: output_table = _filter_table(arrow_table, params) except ConditionError as err: return [ i18n.trans( "regexParseError.message", "Regex parse error: {error}", {"error": e.msg}, ) for e in err.errors ] with pa.ipc.RecordBatchFileWriter(output_path, output_table.schema) as writer: writer.write_table(output_table) return []
def _database_error_to_messages( err: sqlite3.DatabaseError, ) -> List[Union[i18n.I18nMessage, str]]: if isinstance(err, sqlite3.OperationalError) and err.args[0].startswith( "no such table: "): return [ i18n.trans( "badValue.sql.invalidTableName", 'The only valid table name is "{table_name}"', {"table_name": "input"}, ) ] if err.args[0].startswith("near "): return [f"SQL error {str(err)}"] # it's English return [str(err)] # it's English
def i18n_message(self): return i18n.trans( "ErrorCount.message", "“{a_value}” in row {a_row} of “{a_column}” cannot be converted. " "{n_errors, plural, " " one {Overall, there is # error in {n_columns, plural, other {# columns} one {# column}}.} " " other {Overall, there are # errors in {n_columns, plural, other {# columns} one {# column}}.} " "} " "Select 'non-dates to null' to set these values to null.", { "a_value": self.a_value, "a_row": self.a_row + 1, "a_column": self.a_column, "n_errors": self.total, "n_columns": self.n_columns, }, )
def _render_deprecated_parquet(input_path: Path, errors, output_path: Path, params: Dict[str, Any]) -> List[I18nMessage]: cjwparquet.convert_parquet_file_to_arrow_file(input_path, output_path) if params["has_header"]: # In the deprecated parquet format, we _always_ parsed the header pass else: # We used to have a "moduleutils.turn_header_into_first_row()" but it # was broken by design (what about types???) and it was rarely used. # Let's not maintain it any longer. errors += [ trans( "prompt.disableHeaderHandling", "Please re-download this file to disable header-row handling", ) ] return errors
def select_columns_by_number(table, str_col_nums): """ Return a list of column names, or raise ValueError. """ index = parse_interval_index(str_col_nums) # raises ValueError table_col_nums = list(range(0, len(table.columns))) try: mask = index.get_indexer(table_col_nums) != -1 except InvalidIndexError: raise UserVisibleError( i18n.trans( "badParam.column_numbers.overlapping", "There are overlapping numbers in input range", )) return list(table.columns[mask])
def _parse_custom_list( custom_list: str, table_columns: List[str], *, settings: Settings) -> Tuple[Dict[str, str], List[i18n.I18nMessage]]: """ Convert `custom_list` into a valid mapping for `table_columns`. Return a minimal and valid dict from old colname to new colname. Raise `ValueError` if the user entered too many column names. `custom_list` is a textarea filled in by a user, separated by commas/newlines. (We prefer newlines, but if the user writes a comma-separated list we use commas.) The logic to handle this: do _all_ the user's renames at once, and then queue extra renames for columns that end up with duplicate names. Those extra renames are handled left-to-right (the order of `table_columns` matters). """ # Chomp trailing newline, in case the user enters "A,B,C\n". custom_list = custom_list.rstrip() # Split by newline (preferred) or comma (if the user wants that) if "\n" in custom_list: split_char = "\n" else: split_char = "," rename_list = [s.strip() for s in custom_list.split(split_char)] # Convert to dict try: renames = {table_columns[i]: s for i, s in enumerate(rename_list) if s} except IndexError: raise UserVisibleError( i18n.trans( "badParam.custom_list.wrongNumberOfNames", "You supplied {n_names, plural, other {# column names} one {# column name}}, " "but the table has {n_columns, plural, other {# columns} one {# column}}.", { "n_names": len(rename_list), "n_columns": len(table_columns) }, )) # Use _parse_renames() logic to consider missing columns and uniquify return _parse_renames(renames, table_columns, settings=settings)
def render_arrow_v1(arrow_table, params, *, uploaded_files, **kwargs): if params["file"] is None: return ArrowRenderResult(pa.table({})) path = uploaded_files[params["file"]].path try: arrow_table, errors = _build_arrow_table(path, params["query_slug"]) except (InvalidLz4File, sqlite3.DatabaseError): return ArrowRenderResult( pa.table({}), [ RenderError( i18n.trans("error.invalidFile", "Please upload a valid .sqlite3.lz4 file.")) ], ) return ArrowRenderResult(arrow_table, errors=errors)
def render(table, params, *, input_columns, settings: Settings): operation = params["operation"] if operation == "widetolong": if (not params["key_colnames"] or not params["wtl_varcolname"] or not params["wtl_valcolname"]): # missing parameters return table return wide_to_long( table, key_colnames=params["key_colnames"], variable_colname=params["wtl_varcolname"], value_colname=params["wtl_valcolname"], ) elif operation == "longtowide": if not params["key_colnames"] or not params["ltw_varcolname"]: # missing parameters return table if params["ltw_varcolname"] in params["key_colnames"]: return i18n.trans( "error.sameColumnAndRowVariables", "Cannot reshape: column and row variables must be different", ) return long_to_wide( table, key_colnames=params["key_colnames"], variable_colname=params["ltw_varcolname"], settings=settings, ) elif operation == "transpose": return transpose( table, # Backwards-compat because we published it like this way back when {"firstcolname": "New Column"}, input_columns=input_columns, settings=settings, )
def eval_excel(code, args): """Return result of running Excel code with args. Raise UserVisibleError if a function is unimplemented. """ try: ret = code(*args) except DispatcherError as err: if isinstance(err.args[2], NotImplementedError): raise UserVisibleError( i18n.trans( "excel.functionNotImplemented", "Function {name} not implemented", {"name": err.args[1]}, )) else: raise if isinstance(ret, np.ndarray): return ret.item() else: return ret
def excel_formula(table, formula, all_rows): try: # 0 is a list of tokens, 1 is the function builder object code = Parser().ast(formula)[1].compile() except Exception as e: raise UserVisibleError( i18n.trans( "excel.invalidFormula", "Couldn't parse formula: {error}", {"error": str(e)}, )) if all_rows: newcol = eval_excel_all_rows(code, table) newcol = autocast_series_dtype(sanitize_series(newcol)) else: # the whole column is blank except first row value = eval_excel_one_row(code, table) newcol = pd.Series([value] + [None] * (len(table) - 1)) return newcol
def render(table, params): if not params['colnames']: return table for column in params['colnames']: series = table[column] fractions = series / series.sum() if fractions.isin([np.inf, -np.inf]).any(): return i18n.trans( "badData.columnSum.isZero", 'The sum of "{column}" is 0, so we cannot calculate percentages ' 'in it.', {"column": column}) # We avoid duplicate columns by overwriting if there's a conflict table['percent_' + column] = fractions return { 'dataframe': table, 'column_formats': {f'percent_{c}': '{:,.1%}' for c in params['colnames']}, }
def _render_startof(table: pa.Table, colnames: List[str], unit: str) -> ArrowRenderResult: truncated = False for colname in colnames: i = table.column_names.index(colname) column_result = _startof(table.columns[i], unit) table = table.set_column(i, colname, column_result.column) if column_result.truncated: truncated = True if truncated: errors = [ RenderError( trans( "warning.convertedOutOfBoundsToNull", "Converted timestamp {timestamp} to null because it is out of bounds.", {"timestamp": _out_of_bounds_timestamp(unit)}, )) ] else: errors = [] return ArrowRenderResult(table, errors=errors)
def validate_with_table(self, table: pd.DataFrame, input_columns: Dict[str, Any]) -> SeriesParams: """ Create a SeriesParams ready for charting, or raises ValueError. Features ([tested?]): [x] Error if X column is missing [x] Error if no Y columns chosen [x] Error if no rows [x] Nix null X values [x] Error if too many bars [x] What if a Y column is not numeric? framework saves us [x] What if a Y column is the X column? framework saves us: x is text, y is numeric [x] Default title, X and Y axis labels """ if not self.x_column: raise GentleValueError( i18n.trans("noXAxisError.message", "Please choose an X-axis column")) if not self.y_columns: raise GentleValueError( i18n.trans("noYAxisError.message", "Please choose a Y-axis column")) x_series_with_nulls = table[self.x_column] x_mask = ~(pd.isna(x_series_with_nulls)) x_series = XSeries( pd.Series(x_series_with_nulls[x_mask], index=None, name=self.x_column)) if len(x_series.series) > MaxNBars: raise GentleValueError( i18n.trans( "tooManyBarsError.message", "Column chart can visualize a maximum of {MaxNBars} bars", {"MaxNBars": MaxNBars}, )) if not len(x_series.series): raise GentleValueError( i18n.trans("nothingToPlotError.message", "no records to plot")) y_columns = [] for y_column in self.y_columns: y_series_with_nulls = table[y_column.column] y_series = pd.Series(y_series_with_nulls[x_mask], index=None, name=y_column.column) y_columns.append(YSeries(y_series, y_column.color)) x_axis_label = self.x_axis_label or x_series.name y_axis_label = self.y_axis_label or y_columns[0].name y_label_format = python_format_to_d3_tick_format( input_columns[y_columns[0].name].format) return SeriesParams( title=self.title, x_axis_label=x_axis_label, y_axis_label=y_axis_label, x_series=x_series, y_columns=y_columns, y_label_format=y_label_format, )
def _generate_group_dates_help_warning( schema: pa.Schema, colnames: FrozenSet[str]) -> RenderError: timestamp_colnames = [] text_colnames = [] date_colnames_and_units = [] for field in schema: if field.name not in colnames: continue if pa.types.is_date32(field.type): date_colnames_and_units.append( (field.name, field.metadata[b"unit"].decode("ascii"))) elif pa.types.is_timestamp(field.type): timestamp_colnames.append(field.name) elif pa.types.is_string(field.type) or pa.types.is_dictionary( field.type): text_colnames.append(field.name) if date_colnames_and_units: return RenderError( i18n.trans( "group_dates.date_selected", "“{column0}” is Date – {unit0, select, day {day} week {week} month {month} quarter {quarter} year {year} other {}}. Edit earlier steps or use “Convert date unit” to change units.", dict( columns=len(date_colnames_and_units), column0=date_colnames_and_units[0][0], unit0=date_colnames_and_units[0][1], ), )) if timestamp_colnames: return RenderError( i18n.trans( "group_dates.timestamp_selected", "{columns, plural, offset:1 =1 {“{column0}” is Timestamp.}=2 {“{column0}” and one other column are Timestamp.}other {“{column0}” and # other columns are Timestamp.}}", dict(columns=len(timestamp_colnames), column0=timestamp_colnames[0]), ), [ QuickFix( i18n.trans( "group_dates.quick_fix.convert_timestamp_to_date", "Convert to Date", ), QuickFixAction.PrependStep( "converttimestamptodate", dict(colnames=timestamp_colnames)), ) ], ) if text_colnames: return RenderError( i18n.trans( "group_dates.text_selected", "{columns, plural, offset:1 =1 {“{column0}” is Text.}=2 {“{column0}” and one other column are Text.}other {“{column0}” and # other columns are Text.}}", dict(columns=len(text_colnames), column0=text_colnames[0]), ), [ QuickFix( i18n.trans( "group_dates.quick_fix.convert_text_to_date", "Convert to Date", ), QuickFixAction.PrependStep("converttexttodate", dict(colnames=text_colnames)), ), QuickFix( i18n.trans( "group_dates.quick_fix.convert_text_to_timestamp", "Convert to Timestamp first", ), QuickFixAction.PrependStep("convert-date", dict(colnames=text_colnames)), ), ], ) return RenderError( i18n.trans("group_dates.select_date_columns", "Select a Date column."))
def make_chart(self, table: pd.DataFrame, input_columns: Dict[str, Any]) -> Chart: """Create a Chart ready for charting, or raise GentleValueError. Features: * Error if X column is missing * Error if X column does not have two values * Error if X column is all-NaN * Error if too many X values in text mode (since we can't chart them) * X column can be number or date * Missing X dates lead to missing records * Missing X floats lead to missing records * Missing Y values are omitted * Error if no Y columns chosen * Error if a Y column is the X column * Error if a Y column has fewer than 1 non-missing value * Default title, X and Y axis labels """ x_series, mask = self._make_x_series_and_mask(table, input_columns) if not self.y_columns: raise GentleValueError( i18n.trans("noYAxisError.message", "Please choose a Y-axis column")) y_serieses = [] for ycolumn in self.y_columns: if ycolumn.column == self.x_column: raise GentleValueError( i18n.trans( "sameAxesError.message", "You cannot plot Y-axis column {column_name} because it is the X-axis column", {"column_name": ycolumn.column}, )) series = table[ycolumn.column] if not is_numeric_dtype(series.dtype): raise GentleValueError( i18n.trans( "axisNotNumericError.message", 'Cannot plot Y-axis column "{column_name}" because it is not numeric. ' "Convert it to a number before plotting it.", {"column_name": ycolumn.column}, )) series = series[mask] # line up with x_series series.reset_index(drop=True, inplace=True) # Find how many Y values can actually be plotted on the X axis. If # there aren't going to be any Y values on the chart, raise an # error. if not series.count(): raise GentleValueError( i18n.trans( "emptyAxisError.message", 'Cannot plot Y-axis column "{column_name}" because it has no values', {"column_name": ycolumn.column}, )) y_serieses.append( YSeries(series, ycolumn.color, input_columns[ycolumn.column].format)) title = self.title or "Line Chart" x_axis_label = self.x_axis_label or x_series.name if len(y_serieses) == 1: y_axis_label = self.y_axis_label or y_serieses[0].name else: y_axis_label = self.y_axis_label return Chart( title=title, x_axis_label=x_axis_label, x_axis_tick_format=x_series.d3_tick_format, y_axis_label=y_axis_label, x_series=x_series, y_serieses=y_serieses, y_axis_tick_format=y_serieses[0].d3_tick_format, )
def render_arrow_v1(table: pa.Table, params: Dict[str, Any], **kwargs) -> ArrowRenderResult: colnames = table.column_names date_colnames = frozenset(colname for colname in colnames if pa.types.is_timestamp(table[colname].type)) groups = parse_groups(date_colnames=date_colnames, **params["groups"]) aggregations = parse_aggregations(params["aggregations"]) # HACK: set the same default aggregations as we do in our JavaScript component. if not aggregations: aggregations.append( Aggregation(Operation.SIZE, "", Operation.SIZE.default_outname(""))) # This is a "Group By" module so we need to support the obvious operation, # 'SELECT COUNT(*) FROM input'. The obvious way to display that is to select # "Count" and not select a Group By column. # # ... and unfortunately, that form setup -- no columns selected, one # "Count" aggregation selected -- is exactly what the user sees by default # after adding the module, before step 1 of the onboarding path. # # So we get a tough choice: either make "no aggregations" a no-op to give # us the ideal onboarding path, _OR_ make "no aggregations" default to # "count", to support the obvious operation. Pick one: complete+simple, or # onboarding-friendly. # # For now, we're onboarding-friendly and we don't allow SELECT COUNT(*). # When we solve https://www.pivotaltracker.com/story/show/163264164 we # should change to be complete+simple (because the onboarding will have # another answer). That's # https://www.pivotaltracker.com/story/show/164375318 if not groups and aggregations == [ Aggregation(Operation.SIZE, "", Operation.SIZE.default_outname("")) ]: return ArrowRenderResult( table) # no-op: users haven't entered any params # Error out with a quickfix if aggregations need number and we're not number non_numeric_colnames = [] for aggregation in aggregations: if aggregation.operation.needs_numeric_column(): colname = aggregation.colname column = table[colname] if (not pa.types.is_integer(column.type) and not pa.types.is_floating( column.type)) and colname not in non_numeric_colnames: non_numeric_colnames.append(colname) if non_numeric_colnames: return ArrowRenderResult( pa.table({}), errors=[ RenderError( i18n.trans( "non_numeric_colnames.error", "{n_columns, plural," ' one {Column "{first_colname}"}' ' other {# columns (see "{first_colname}")}} ' "must be Numbers", { "n_columns": len(non_numeric_colnames), "first_colname": non_numeric_colnames[0], }, ), quick_fixes=[ QuickFix( i18n.trans("non_numeric_colnames.quick_fix.text", "Convert"), action=QuickFixAction.PrependStep( "converttexttonumber", {"colnames": non_numeric_colnames}, ), ) ], ) ], ) errors = _warn_if_using_deprecated_date_granularity(table, groups) if not errors and params["groups"]["group_dates"]: errors = [ _generate_group_dates_help_warning( table.schema, frozenset(group.colname for group in groups)) ] result_table = groupby(table, groups, aggregations) return ArrowRenderResult(result_table, errors=errors)
def render(table, params, *, input_columns, settings: Settings): warnings = [] colnames_auto_converted_to_text = [] if len(table) > settings.MAX_COLUMNS_PER_TABLE: table = table.truncate(after=settings.MAX_COLUMNS_PER_TABLE - 1) warnings.append( i18n.trans( "warnings.tooManyRows", "We truncated the input to {max_columns} rows so the " "transposed table would have a reasonable number of columns.", {"max_columns": settings.MAX_COLUMNS_PER_TABLE}, ) ) if not len(table.columns): # happens if we're the first module in the module stack return pd.DataFrame() column = table.columns[0] first_column = table[column] table.drop(column, axis=1, inplace=True) if input_columns[column].type != "text": warnings.append( { "message": i18n.trans( "warnings.headersConvertedToText.message", 'Headers in column "{column_name}" were auto-converted to text.', {"column_name": column}, ), "quickFixes": [ { "text": i18n.trans( "warnings.headersConvertedToText.quickFix.text", "Convert {column_name} to text", {"column_name": '"%s"' % column}, ), "action": "prependModule", "args": [ "converttotext", {"colnames": [column]}, ], } ], } ) # Ensure headers are string. (They will become column names.) # * categorical => str # * nan => "" # * non-text => str na = first_column.isna() first_column = first_column.astype(str) first_column[na] = "" # Empty values are all equivalent gen_headers_result = _gen_colnames_and_warn( params["firstcolname"], first_column, settings ) warnings.extend(gen_headers_result.warnings) input_types = set(c.type for c in input_columns.values() if c.name != column) if len(input_types) > 1: # Convert everything to text before converting. (All values must have # the same type.) to_convert = [c for c in table.columns if input_columns[c].type != "text"] if to_convert: warnings.append( { "message": i18n.trans( "warnings.differentColumnTypes.message", '{n_columns, plural, other {# columns (see "{first_colname}") were} one {Column "{first_colname}" was}} ' "auto-converted to Text because all columns must have the same type.", {"n_columns": len(to_convert), "first_colname": to_convert[0]}, ), "quickFixes": [ { "text": i18n.trans( "warnings.differentColumnTypes.quickFix.text", "Convert {n_columns, plural, other {# columns} one {# column}} to text", {"n_columns": len(to_convert)}, ), "action": "prependModule", "args": [ "converttotext", {"colnames": to_convert}, ], } ], } ) for colname in to_convert: # TODO respect column formats ... and nix the quick-fix? na = table[colname].isnull() table[colname] = table[colname].astype(str) table[colname][na] = np.nan # The actual transpose table.index = gen_headers_result.names[1:] ret = table.T # Set the name of the index: it will become the name of the first column. ret.index.name = gen_headers_result.names[0] # Make the index (former colnames) a column ret.reset_index(inplace=True) if warnings: return (ret, warnings) else: return ret