def test_full_monty(self): txt = "sum:c|Sum of Fun" ag = Aggy.parse_aggy_string(txt) self.assertEqual(ag.slug, "sum") self.assertEqual(ag.title, "Sum of Fun") self.assertEqual(ag.agg_args, ["c"]) txt = 'count:a,"Hello,world"|"Counts, are |Fun|"' ag = Aggy.parse_aggy_string(txt) self.assertEqual(ag.slug, "count") self.assertEqual(ag.title, "Counts, are |Fun|") self.assertEqual(ag.agg_args, ["a", "Hello,world"])
def test_custom_title(self): ag = Aggy.parse_aggy_string("count|Hello") self.assertEqual(ag.title, "Hello") self.assertEqual(ag.agg_args, []) self.assertEqual(ag.slug, "count") ag = Aggy.parse_aggy_string('count|"Hello,goodday|and bye"') self.assertEqual( ag.title, "Hello,goodday|and bye", ) self.assertEqual(ag.slug, "count")
def test_basic(self): ag = Aggy.parse_aggy_string("count") assert isinstance(ag, Aggy) self.assertEqual(ag.slug, "count") self.assertEqual(ag.title, "count_of") self.assertEqual(ag.agg_args, []) assert isinstance(ag.aggregation, agate.Aggregation)
def _validate_aggy_column_arguments(self, aggy: Aggy, table: agate.Table) -> NoReturnType: """ Aggy is csvpivot/agate.Table agnostic, so this method: - makes sure that aggy's ostensible column_name argument is actually in the table - typecasts the second argument of count() to match the datatype of the column that it counts """ ############################ # checking valid column name # (For now, all possible Aggregates use the first argument (if it exists) as the column_name to aggregate) if aggy._args: _col = aggy._args[0] # the following is redundant with the filter_rows() check already done if _col not in table.column_names: raise ColumnNameError( "ColumnNameError: " + f"Attempted to perform `{aggy.slug}('{_col}', ...)`. " + f"But '{_col}' was expected to be a valid column name, i.e. from: {table.column_names}", ) ################################################################ # if the aggregation is count(), and there are 2 arguments # then the 2nd argument is typecasted against the table.column # (i.e. the column name referenced by the first arg) if len(aggy._args) > 1 and aggy.slug == "count": col_name, cval = aggy.agg_args[0:2] try: # get column from first arg, which is presumably a column_name _col: agate.Column = next(c for c in table.columns if c.name == col_name) except StopIteration as err: raise ColumnIdentifierError( f"'{col_name}' – from `{argtext}` – is expected to be a column name, but it was not found in the table: {table.column_names}" ) else: dtype = _col.data_type # attempt a data_type conversion try: dval = dtype.cast(cval) # modify agg_args except agate.CastError as err: typename: str = type(dtype).__name__ raise agate.CastError( f"You attempted to count '{cval}' in column '{col_name}', which has datatype {typename}. But '{cval}' could not be converted to {typename}." ) else: aggy._args[1] = dval
def main(self): if self.additional_input_expected(): self.argparser.error( "You must provide an input file or piped data.") # UniformReader (DRY later) self.read_input() if self.is_empty: return # TODO ugly access of internal variables, DRY later: self._rows = self._filter_input_rows(self.i_rows, self.i_column_names) self._column_names = self.i_rows.column_names # extract aggies aggies: list if not self.args.aggregates_list: # set default aggregation if there were none aggies = [Aggy.parse_aggy_string("count")] else: aggies = self.args.aggregates_list.copy() for a in aggies: self._validate_aggy_column_arguments(a, table=self.i_rows) outtable: agate.Table outtable = self.i_rows if self.pivot_column_name: # in this mode, only one aggregation is allowed. This is enforced inside run() # Also, this aggregation: # - is performed for each group i.e. cell of the aggregated data # - aggy.title is not used and thus ignored # - TODO: warn user that title is ignored? outtable = outtable.pivot( key=self.pivot_row_names or None, pivot=self.pivot_column_name or None, aggregation=aggies[0].aggregation, ) else: # user wants multiple aggregations for rows, so this is essentially a group_by for rcol in self.pivot_row_names: outtable = outtable.group_by(key=rcol) outtable = outtable.aggregate([(a.title, a.aggregation) for a in aggies]) outtable.to_csv(self.output_file, **self.writer_kwargs) return 0
def __call__(self, parser, namespace, values, option_string=None): aggy = Aggy.parse_aggy_string(values) super().__call__(parser, namespace, aggy, option_string)
def test_pipes_everywhere(self): txt = 'count:hello,"foo|bar"|"Actual Title|really"' ag = Aggy.parse_aggy_string(txt) self.assertEqual(ag.slug, "count") self.assertEqual(ag.agg_args, ["hello", "foo|bar"]) self.assertEqual(ag.title, "Actual Title|really")
def test_with_multiple_args(self): ag = Aggy.parse_aggy_string(r'count:hello,"foo, Bar!t"') self.assertEqual(ag.agg_args, ["hello", "foo, Bar!t"]) self.assertEqual(ag.slug, "count") self.assertEqual(ag.title, "count_of_hello_foo_bar_t")
def test_with_single_arg(self): ag = Aggy.parse_aggy_string("sum:c") self.assertEqual(ag.agg_args, ["c"]) self.assertEqual(ag.slug, "sum") self.assertEqual(ag.title, "sum_of_c")