def test_valuecounts_shortrows(): table = (("foo", "bar"), ("a", True), ("x", True), ("b",), ("b", True), ("c", False), ("z", False)) actual = valuecounts(table, "bar") expect = (("bar", "count", "frequency"), (True, 3, 3.0 / 6), (False, 2, 2.0 / 6), (None, 1, 1.0 / 6)) ieq(expect, actual) ieq(expect, actual)
def test_valuecounts(): """Test the valuecounts function.""" table = (('foo', 'bar'), ('a', 1), ('b', 2), ('b', 7)) actual = valuecounts(table, 'foo') expect = (('value', 'count', 'frequency'), ('b', 2, 2./3), ('a', 1, 1./3)) iassertequal(expect, actual)
def test_valuecounts(): """Test the valuecounts function.""" table = (('foo', 'bar'), ('a', 1), ('b', 2), ('b', 7)) actual = valuecounts(table, 'foo') expect = (('foo', 'count', 'frequency'), ('b', 2, 2./3), ('a', 1, 1./3)) ieq(expect, actual) ieq(expect, actual)
def test_valuecounts(): """Test the valuecounts function.""" table = (("foo", "bar"), ("a", 1), ("b", 2), ("b", 7)) actual = valuecounts(table, "foo") expect = (("foo", "count", "frequency"), ("b", 2, 2.0 / 3), ("a", 1, 1.0 / 3)) ieq(expect, actual) ieq(expect, actual)
def test_valuecounts_shortrows(): table = (('foo', 'bar'), ('a', True), ('x', True), ('b', ), ('b', True), ('c', False), ('z', False)) actual = valuecounts(table, 'bar') expect = (('bar', 'count', 'frequency'), (True, 3, 3. / 6), (False, 2, 2. / 6), (None, 1, 1. / 6)) ieq(expect, actual) ieq(expect, actual)
def get_counter_table(fields, csv_name): if not fields: return 'No data to count' csv_file = f'{CSV_DIR}/{csv_name}' csv_data = petl.fromcsv(csv_file) cut_csv_data = petl.cutout(petl.valuecounts(csv_data, *fields), 'frequency') html_data = get_html_data(cut_csv_data) return html_data
def test_valuecounts_multifields(): table = (('foo', 'bar', 'baz'), ('a', True, .12), ('a', True, .17), ('b', False, .34), ('b', False, .44), ('b', ), ('b', False, .56)) actual = valuecounts(table, 'foo', 'bar') expect = (('foo', 'bar', 'count', 'frequency'), ('b', False, 3, 3. / 6), ('a', True, 2, 2. / 6), ('b', None, 1, 1. / 6)) ieq(expect, actual) ieq(expect, actual)
def test_valuecounts_somefields(): table = (('foo', 'bar', 'baz'), ('a', True, .12), ('b', True, .34), ('b', ), ('b', True, .56), ('c', False, .86), ('c', False, .92)) actual = valuecounts(table, 'foo', 'bar') expect = (('field', 'value', 'count', 'frequency'), ('foo', 'b', 3, 3. / 6), ('foo', 'c', 2, 2. / 6), ('foo', 'a', 1, 1. / 6), ('bar', True, 3, 3. / 6), ('bar', False, 2, 2. / 6), ('bar', None, 1, 1. / 6)) ieq(expect, actual) ieq(expect, actual)
def test_valuecounts_allfields(): table = (('foo', 'bar'), ('a', True), ('b', True), ('b', ), ('b', True), ('c', False), ('c', False)) actual = valuecounts(table) expect = (('field', 'value', 'count', 'frequency'), ('foo', 'b', 3, 3. / 6), ('foo', 'c', 2, 2. / 6), ('foo', 'a', 1, 1. / 6), ('bar', True, 3, 3. / 6), ('bar', False, 2, 2. / 6), ('bar', None, 1, 1. / 6)) ieq(expect, actual) ieq(expect, actual)
def count_fields( petl_view: petl.Table, field_names: Tuple[str, ...], ) -> List[Dict[str, Any]]: """Count combinations of ``field_names`` fields in given ``petl_view``.""" # TODO: add ``field_names`` validation fields_counts = petl.valuecounts(petl_view, *field_names) fields_counts = fields_counts.cut(*field_names, 'count') fields_counts_iterator = iter(fields_counts) keys: Tuple[str, ...] = next(fields_counts_iterator, ()) return [dict(zip(keys, row)) for row in fields_counts_iterator]
def test_valuecounts_shortrows(): table = (('foo', 'bar'), ('a', True), ('x', True), ('b',), ('b', True), ('c', False), ('z', False)) actual = valuecounts(table, 'bar') expect = (('value', 'count', 'frequency'), (True, 3, 3./6), (False, 2, 2./6), (None, 1, 1./6)) ieq(expect, actual) ieq(expect, actual)
def test_valuecounts_shortrows(): """Test the valuecounts function with short rows.""" table = (('foo', 'bar'), ('a', True), ('x', True), ('b',), ('b', True), ('c', False), ('z', False)) actual = valuecounts(table, 'bar') expect = (('value', 'count', 'frequency'), (True, 3, 3./6), (False, 2, 2./6), (None, 1, 1./6)) iassertequal(expect, actual)
def test_valuecounts_multifields(): table = (('foo', 'bar', 'baz'), ('a', True, .12), ('a', True, .17), ('b', False, .34), ('b', False, .44), ('b',), ('b', False, .56)) actual = valuecounts(table, 'foo', 'bar') expect = (('foo', 'bar', 'count', 'frequency'), ('b', False, 3, 3./6), ('a', True, 2, 2./6), ('b', None, 1, 1./6)) ieq(expect, actual) ieq(expect, actual)
def valuecounts(table, col_name): return_dict = {} reported_count = 0 unreported_count = 0 column = petl.values(table, col_name) nrows = petl.nrows(table) non_blanks = petl.select(table, '{' + quote_single_quote(col_name) + "} != ''") num_blanks = nrows - petl.nrows(non_blanks) counts_table = petl.valuecounts(non_blanks, col_name) for row in petl.records(counts_table): if row['frequency'] > 0.01: return_dict[row[col_name]] = row['count'] reported_count += row['count'] else: unreported_count += row['count'] return_dict['<other>'] = unreported_count return_dict['<blank>'] = num_blanks return return_dict
def test_valuecounts_somefields(): table = (('foo', 'bar', 'baz'), ('a', True, .12), ('b', True, .34), ('b',), ('b', True, .56), ('c', False, .86), ('c', False, .92)) actual = valuecounts(table, 'foo', 'bar') expect = (('field', 'value', 'count', 'frequency'), ('foo', 'b', 3, 3./6), ('foo', 'c', 2, 2./6), ('foo', 'a', 1, 1./6), ('bar', True, 3, 3./6), ('bar', False, 2, 2./6), ('bar', None, 1, 1./6)) ieq(expect, actual) ieq(expect, actual)
def test_valuecounts_allfields(): table = (('foo', 'bar'), ('a', True), ('b', True), ('b',), ('b', True), ('c', False), ('c', False)) actual = valuecounts(table) expect = (('field', 'value', 'count', 'frequency'), ('foo', 'b', 3, 3./6), ('foo', 'c', 2, 2./6), ('foo', 'a', 1, 1./6), ('bar', True, 3, 3./6), ('bar', False, 2, 2./6), ('bar', None, 1, 1./6)) ieq(expect, actual) ieq(expect, actual)
def test_valuecounts_multifields(): table = ( ("foo", "bar", "baz"), ("a", True, 0.12), ("a", True, 0.17), ("b", False, 0.34), ("b", False, 0.44), ("b",), ("b", False, 0.56), ) actual = valuecounts(table, "foo", "bar") expect = ( ("foo", "bar", "count", "frequency"), ("b", False, 3, 3.0 / 6), ("a", True, 2, 2.0 / 6), ("b", None, 1, 1.0 / 6), ) ieq(expect, actual) ieq(expect, actual)
""" Examples used in docstrings. """ # valuecounts ############## table = (('foo', 'bar', 'baz'), ('a', True, .12), ('a', True, .17), ('b', False, .34), ('b', False, .44), ('b', ), ('b', False, .56)) from petl import look, valuecounts look(table) look(valuecounts(table, 'foo')) look(valuecounts(table, 'foo', 'bar')) # facetcolumns from petl import facetcolumns table = [['foo', 'bar', 'baz'], ['a', 1, True], ['b', 2, True], ['b', 3]] fc = facetcolumns(table, 'foo') fc['a'] fc['a']['foo'] fc['a']['bar'] fc['a']['baz'] fc['b'] fc['b']['foo'] fc['b']['bar'] fc['b']['baz'] fc['c']
['b', True], ['c', False]] etl.valuecounter(table, 'foo').most_common() # valuecounts() ############### import petl as etl table = [['foo', 'bar', 'baz'], ['a', True, 0.12], ['a', True, 0.17], ['b', False, 0.34], ['b', False, 0.44], ['b']] etl.valuecounts(table, 'foo') etl.valuecounts(table, 'foo', 'bar') # parsecounter() ################ import petl as etl table = [['foo', 'bar', 'baz'], ['A', 'aaa', 2], ['B', u'2', '3.4'], [u'B', u'3', u'7.8', True], ['D', '3.7', 9.0], ['E', 42]] counter, errors = etl.parsecounter(table, 'bar') counter.most_common()
def value_count(filepath, *values): table = etl.fromcsv(filepath) return list(etl.valuecounts(table, *values).dicts())
def get_value_counts_from_csv_file(collection_file_name: str, fields_to_fetch: List[str]) -> List: table_data = etl.fromcsv(collection_file_name) value_counts = etl.valuecounts(table_data, *fields_to_fetch) return list(etl.cutout(value_counts, "frequency").dicts())
""" # valuecounts ############## table = (('foo', 'bar', 'baz'), ('a', True, .12), ('a', True, .17), ('b', False, .34), ('b', False, .44), ('b',), ('b', False, .56)) from petl import look, valuecounts look(table) look(valuecounts(table, 'foo')) look(valuecounts(table, 'foo', 'bar')) # facetcolumns from petl import facetcolumns table = [['foo', 'bar', 'baz'], ['a', 1, True], ['b', 2, True], ['b', 3]] fc = facetcolumns(table, 'foo') fc['a'] fc['a']['foo'] fc['a']['bar'] fc['a']['baz']