def rowlengths(table): """ Report on row lengths found in the table. E.g.:: >>> import petl as etl >>> table = [['foo', 'bar', 'baz'], ... ['A', 1, 2], ... ['B', '2', '3.4'], ... [u'B', u'3', u'7.8', True], ... ['D', 'xyz', 9.0], ... ['E', None], ... ['F', 9]] >>> etl.rowlengths(table) +--------+-------+ | length | count | +========+=======+ | 3 | 3 | +--------+-------+ | 2 | 2 | +--------+-------+ | 4 | 1 | +--------+-------+ Useful for finding potential problems in data files. """ counter = Counter() for row in data(table): counter[len(row)] += 1 output = [('length', 'count')] output.extend(counter.most_common()) return wrap(output)
def _get_error_details(target, num, err, record, schema): '''show last row when failed writing for throubleshooting''' headers = _get_schema_header_names(schema) if isinstance(record, dict): table = [headers, list(record.values())] else: table = [headers, record] example = wrap(table).look() dest = " output: %s" % target if isinstance(target, string_types) else '' printed = "failed writing on row #%d: %s\n%s\n schema: %s\n%s" details = printed % (num, err, dest, schema, example) return details
def stringpatterns(table, field): """ Profile string patterns in the given field, returning a table of patterns, counts and frequencies. E.g.:: >>> import petl as etl >>> table = [['foo', 'bar'], ... ['Mr. Foo', '123-1254'], ... ['Mrs. Bar', '234-1123'], ... ['Mr. Spo', '123-1254'], ... [u'Mr. Baz', u'321 1434'], ... [u'Mrs. Baz', u'321 1434'], ... ['Mr. Quux', '123-1254-XX']] >>> etl.stringpatterns(table, 'foo') +------------+-------+---------------------+ | pattern | count | frequency | +============+=======+=====================+ | 'Aa. Aaa' | 3 | 0.5 | +------------+-------+---------------------+ | 'Aaa. Aaa' | 2 | 0.3333333333333333 | +------------+-------+---------------------+ | 'Aa. Aaaa' | 1 | 0.16666666666666666 | +------------+-------+---------------------+ >>> etl.stringpatterns(table, 'bar') +---------------+-------+---------------------+ | pattern | count | frequency | +===============+=======+=====================+ | '999-9999' | 3 | 0.5 | +---------------+-------+---------------------+ | '999 9999' | 2 | 0.3333333333333333 | +---------------+-------+---------------------+ | '999-9999-AA' | 1 | 0.16666666666666666 | +---------------+-------+---------------------+ """ counter = stringpatterncounter(table, field) output = [('pattern', 'count', 'frequency')] counter = counter.most_common() total = sum(c[1] for c in counter) cnts = [(c[0], c[1], float(c[1]) / total) for c in counter] output.extend(cnts) return wrap(output)
def stringpatterns(table, field): """ Profile string patterns in the given field, returning a table of patterns, counts and frequencies. E.g.:: >>> import petl as etl >>> table = [['foo', 'bar'], ... ['Mr. Foo', '123-1254'], ... ['Mrs. Bar', '234-1123'], ... ['Mr. Spo', '123-1254'], ... [u'Mr. Baz', u'321 1434'], ... [u'Mrs. Baz', u'321 1434'], ... ['Mr. Quux', '123-1254-XX']] >>> etl.stringpatterns(table, 'foo') +------------+-------+---------------------+ | pattern | count | frequency | +============+=======+=====================+ | 'Aa. Aaa' | 3 | 0.5 | +------------+-------+---------------------+ | 'Aaa. Aaa' | 2 | 0.3333333333333333 | +------------+-------+---------------------+ | 'Aa. Aaaa' | 1 | 0.16666666666666666 | +------------+-------+---------------------+ >>> etl.stringpatterns(table, 'bar') +---------------+-------+---------------------+ | pattern | count | frequency | +===============+=======+=====================+ | '999-9999' | 3 | 0.5 | +---------------+-------+---------------------+ | '999 9999' | 2 | 0.3333333333333333 | +---------------+-------+---------------------+ | '999-9999-AA' | 1 | 0.16666666666666666 | +---------------+-------+---------------------+ """ counter = stringpatterncounter(table, field) output = [('pattern', 'count', 'frequency')] counter = counter.most_common() total = sum(c[1] for c in counter) cnts = [(c[0], c[1], float(c[1])/total) for c in counter] output.extend(cnts) return wrap(output)