예제 #1
0
파일: job.py 프로젝트: berkeman/accelerator
def show(url, job, show_output):
    print(job.path)
    print('=' * len(job.path))
    setup = job.json_load('setup.json')
    setup.pop('_typing', None)
    setup.starttime = str(datetime.fromtimestamp(setup.starttime))
    if 'endtime' in setup:
        setup.endtime = str(datetime.fromtimestamp(setup.endtime))
    print(encode_setup(setup, as_str=True))
    if job.datasets:
        print()
        print('datasets:')
        for ds in job.datasets:
            print('   ', ds.quoted)
    try:
        post = job.json_load('post.json')
    except FileNotFoundError:
        print(colour('WARNING: Job did not finish', 'job/warning'))
        post = None
    if post and post.subjobs:
        print()
        print('subjobs:')
        for sj in sorted(post.subjobs):
            print('   ', sj)
    if post and post.files:
        print()
        print('files:')
        for fn in sorted(post.files):
            print('   ', job.filename(fn))
    if post and not call(url + '/job_is_current/' + url_quote(job)):
        print(colour('Job is not current', 'job/info'))
    print()
    out = job.output()
    if show_output:
        if out:
            print('output (use --just-output/-O to see only the output):')
            print(out)
            if not out.endswith('\n'):
                print()
        else:
            print(job, 'produced no output')
            print()
    elif out:
        print('%s produced %d bytes of output, use --output/-o to see it' % (
            job,
            len(out),
        ))
        print()
예제 #2
0
파일: grep.py 프로젝트: eBay/accelerator
 def separate(items, lens):
     things = []
     for item, item_len in zip(items, lens):
         things.append(item)
         spaces = 8 - (item_len % 8)
         things.append(colour(' ' * spaces, 'cyan', 'underline'))
     return ''.join(things[:-1])
예제 #3
0
 def separate(items, lens):
     things = []
     for item, item_len in zip(items, lens):
         things.append(item)
         spaces = args.tab_length - (item_len % args.tab_length)
         things.append(colour(' ' * spaces, 'grep/separator'))
     return ''.join(things[:-1])
예제 #4
0
 def gen_headers(headers):
     show_items = headers_prefix + headers
     if escape_item:
         show_items = list(map(escape_item, show_items))
     coloured = (colour(item, 'grep/header') for item in show_items)
     txt = separate(coloured, map(len, show_items))
     return txt.encode('utf-8', 'surrogatepass') + b'\n'
예제 #5
0
 def colour_item(item):
     pos = 0
     parts = []
     for a, b in matching_ranges(item):
         parts.extend(
             (item[pos:a], colour(item[a:b], 'grep/highlight')))
         pos = b
     parts.append(item[pos:])
     return ''.join(parts)
예제 #6
0
def printdesc(items, columns, colour_prefix, full=False):
    ddot = ' ...'

    def chopline(description, max_len):
        if len(description) > max_len:
            max_len -= len(ddot)
            parts = description.split()
            description = ''
            for part in parts:
                if len(description) + len(part) + 1 > max_len:
                    break
                if description:
                    description = '%s %s' % (
                        description,
                        part,
                    )
                else:
                    description = part
            description += colour.faint(ddot)
        return description

    items = [(name, description.strip('\n').split('\n'))
             for name, description in items]
    if not full:
        # make names the same length, within same-ish length groups
        lens = set(len(name) for name, _ in items)
        len2len = {}
        group_size = 14
        spaces = ' ' * group_size
        while lens:
            this = min(lens)
            here = {l for l in lens if l < this + group_size}
            m = max(here)
            len2len.update({l: m for l in here})
            lens -= here
        items = [(
            (name + spaces)[:len2len[len(name)]],
            description[0],
        ) for name, description in items]
    for name, description in items:
        max_len = columns - len(ddot) - len(name)
        preamble = colour('  ' + name, colour_prefix + '/highlight')
        if description and max_len > 10:
            lines = description.split('\n')
            if full:
                print(preamble)
                for line in lines:
                    print('    ' + line)
            else:
                print(preamble + '  ' + chopline(lines[0], max_len))
        else:
            print(preamble)
예제 #7
0
def main(argv, cfg):
	descr = "lists and describes build scripts"
	parser = ArgumentParser(
		prog=argv.pop(0),
		description=descr,
	)
	parser.add_argument('-s', '--short', action='store_true', help='short listing')
	parser.add_argument('-p', '--path', action='store_true', help='show package paths')
	parser.add_argument('match', nargs='*', default=[], help='substring used for matching')
	args = parser.parse_intermixed_args(argv)
	columns = terminal_size().columns

	if not args.match:
		# no args => list everything in short format
		args.match = ['']
		args.short = True

	packages = []
	for package in cfg.method_directories:
		path = dirname(import_module(package).__file__)
		scripts = []
		packages.append((package, path, scripts))
		for item in sorted(glob(path + '/build.py') + glob(path + '/build_*.py')):
			name = basename(item[:-3])
			modname = '.'.join((package, name))
			if any(m in modname for m in args.match):
				try:
					module = import_module(modname)
				except Exception as e:
					print(colour('%s: %s' % (item, e,), 'script/warning'), file=sys.stderr)
					continue
				scripts.append((name, getattr(module, 'description', '')))

	for package, path, scripts in sorted(packages):
		if scripts:
			if args.path:
				print(path + '/')
			else:
				print(package)
			printdesc(sorted(scripts), columns, 'script', full=not args.short)
예제 #8
0
파일: grep.py 프로젝트: eBay/accelerator
def main(argv, cfg):
    parser = ArgumentParser(
        usage="%(prog)s [options] pattern ds [ds [...]] [column [column [...]]",
        prog=argv.pop(0),
    )
    parser.add_argument(
        '-c',
        '--chain',
        action='store_true',
        help="follow dataset chains",
    )
    parser.add_argument(
        '--colour',
        '--color',
        nargs='?',
        const='always',
        choices=['auto', 'never', 'always'],
        type=str.lower,
        help="colour matched text. can be auto, never or always",
        metavar='WHEN',
    )
    parser.add_argument(
        '-i',
        '--ignore-case',
        action='store_true',
        help="case insensitive pattern",
    )
    parser.add_argument(
        '-H',
        '--headers',
        action='store_true',
        help="print column names before output (and on each change)",
    )
    parser.add_argument(
        '-O',
        '--ordered',
        action='store_true',
        help="output in order (one slice at a time)",
    )
    parser.add_argument(
        '-g',
        '--grep',
        action='append',
        help="grep this column only, can be specified multiple times",
        metavar='COLUMN')
    parser.add_argument(
        '-s',
        '--slice',
        action='append',
        help="grep this slice only, can be specified multiple times",
        type=int)
    parser.add_argument(
        '-D',
        '--show-dataset',
        action='store_true',
        help="show dataset on matching lines",
    )
    parser.add_argument(
        '-S',
        '--show-sliceno',
        action='store_true',
        help="show sliceno on matching lines",
    )
    parser.add_argument(
        '-L',
        '--show-lineno',
        action='store_true',
        help="show lineno (per slice) on matching lines",
    )
    supported_formats = (
        'csv',
        'raw',
        'json',
    )
    parser.add_argument(
        '-f',
        '--format',
        default='csv',
        choices=supported_formats,
        help="output format, csv (default) / " +
        ' / '.join(supported_formats[1:]),
        metavar='FORMAT',
    )
    parser.add_argument(
        '-t',
        '--separator',
        help="field separator, default tab / tab-like spaces",
    )
    parser.add_argument('pattern')
    parser.add_argument(
        'dataset', help='can be specified in the same ways as for "ax ds"')
    parser.add_argument('columns', nargs='*', default=[])
    args = parser.parse_intermixed_args(argv)

    pat_s = re.compile(args.pattern, re.IGNORECASE if args.ignore_case else 0)
    datasets = [name2ds(cfg, args.dataset)]
    columns = []

    for ds_or_col in args.columns:
        if columns:
            columns.append(ds_or_col)
        else:
            try:
                datasets.append(name2ds(cfg, ds_or_col))
            except Exception:
                columns.append(ds_or_col)

    if not datasets:
        parser.print_help(file=sys.stderr)
        return 1

    grep_columns = set(args.grep or ())
    if grep_columns == set(columns):
        grep_columns = None

    if args.slice:
        want_slices = []
        for s in args.slice:
            assert 0 <= s < g.slices, "Slice %d not available" % (s, )
            if s not in want_slices:
                want_slices.append(s)
    else:
        want_slices = list(range(g.slices))

    if args.chain:
        datasets = list(chain.from_iterable(ds.chain() for ds in datasets))

    if columns or grep_columns:
        bad = False
        need_cols = set(columns)
        if grep_columns:
            need_cols.update(grep_columns)
        for ds in datasets:
            missing = need_cols - set(ds.columns)
            if missing:
                print('ERROR: %s does not have columns %r' % (
                    ds,
                    missing,
                ),
                      file=sys.stderr)
                bad = True
        if bad:
            return 1

    # never and always override env settings, auto (default) sets from env/tty
    if args.colour == 'never':
        colour.disable()
        highlight_matches = False
    elif args.colour == 'always':
        colour.enable()
        highlight_matches = True
    else:
        highlight_matches = colour.enabled

    # Don't highlight everything when just trying to cat
    if args.pattern == '':
        highlight_matches = False

    separator = args.separator
    if separator is None and not sys.stdout.isatty():
        separator = '\t'

    if separator is None:
        # special case where we try to be like a tab, but with spaces.
        # this is useful because terminals typically don't style tabs.
        def separate(items, lens):
            things = []
            for item, item_len in zip(items, lens):
                things.append(item)
                spaces = 8 - (item_len % 8)
                things.append(colour(' ' * spaces, 'cyan', 'underline'))
            return ''.join(things[:-1])

        separator = '\t'
    else:
        separator_coloured = colour(separator, 'cyan', 'underline')

        def separate(items, lens):
            return separator_coloured.join(items)

    def json_default(obj):
        if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)):
            return str(obj)
        elif isinstance(obj, complex):
            return [obj.real, obj.imag]
        else:
            return repr(obj)

    if args.format == 'csv':

        def escape_item(item):
            if item and (separator in item or item[0] in '\'"'
                         or item[-1] in '\'"'):
                return '"' + item.replace('\n', '\\n').replace('"', '""') + '"'
            else:
                return item.replace('\n', '\\n')

        errors = 'surrogatepass'
    else:
        escape_item = None
        errors = 'replace' if PY2 else 'surrogateescape'

    def grep(ds, sliceno):
        def no_conv(v):
            return v

        def mk_conv(col):
            if ds.columns[col].type in (
                    'bytes',
                    'unicode',
                    'ascii',
            ):
                if not ds.columns[col].none_support:
                    return no_conv
            return unicode

        chk = pat_s.search

        def mk_iter(col):
            if ds.columns[col].type == 'ascii':
                it = ds._column_iterator(sliceno, col, _type='unicode')
            else:
                it = ds._column_iterator(sliceno, col)
            if ds.columns[col].type == 'bytes':
                errors = 'replace' if PY2 else 'surrogateescape'
                if ds.columns[col].none_support:
                    it = (None if v is None else v.decode('utf-8', errors)
                          for v in it)
                else:
                    it = (v.decode('utf-8', errors) for v in it)
            return it

        def colour_item(item):
            pos = 0
            parts = []
            for m in pat_s.finditer(item):
                a, b = m.span()
                parts.extend((item[pos:a], colour.red(item[a:b])))
                pos = b
            parts.append(item[pos:])
            return ''.join(parts)

        if args.format == 'json':
            prefix = {}
            dumps = json.JSONEncoder(ensure_ascii=False,
                                     default=json_default).encode
            if args.show_dataset:
                prefix['dataset'] = ds
            if args.show_sliceno:
                prefix['sliceno'] = sliceno

            def show():
                d = dict(zip(used_columns, items))
                if args.show_lineno:
                    prefix['lineno'] = lineno
                if prefix:
                    prefix['data'] = d
                    d = prefix
                return dumps(d).encode('utf-8', 'surrogatepass')
        else:
            prefix = []
            if args.show_dataset:
                prefix.append(ds)
            if args.show_sliceno:
                prefix.append(str(sliceno))
            prefix = tuple(prefix)

            def show():
                data = list(prefix)
                if args.show_lineno:
                    data.append(unicode(lineno))
                if PY2:
                    show_items = (v if isinstance(v, unicode) else
                                  str(v).decode('utf-8', 'replace')
                                  for v in items)
                else:
                    show_items = map(str, items)
                show_items = list(show_items)
                lens = (len(item) for item in data + show_items)
                if highlight_matches:
                    show_items = list(map(colour_item, show_items))
                if escape_item:
                    lens_unesc = (len(item) for item in data + show_items)
                    show_items = list(map(escape_item, show_items))
                    lens_esc = (len(item) for item in data + show_items)
                    lens = (
                        l + esc - unesc
                        for l, unesc, esc in zip(lens, lens_unesc, lens_esc))
                data.extend(show_items)
                return separate(data, lens).encode('utf-8', errors)

        used_columns = columns or sorted(ds.columns)
        if grep_columns and grep_columns != set(used_columns):
            grep_iter = izip(*(mk_iter(col) for col in grep_columns))
            conv_items = [mk_conv(col) for col in grep_columns]
        else:
            grep_iter = repeat(None)
            conv_items = [mk_conv(col) for col in used_columns]
        lines_iter = izip(*(mk_iter(col) for col in used_columns))
        for lineno, (grep_items,
                     items) in enumerate(izip(grep_iter, lines_iter)):
            if any(
                    chk(conv(item))
                    for conv, item in izip(conv_items, grep_items or items)):
                # This will be atomic if the line is not too long
                # (at least up to PIPE_BUF bytes, should be at least 512).
                write(1, show() + b'\n')

    def one_slice(sliceno, q, wait_for):
        try:
            if q:
                q.get()
            for ds in datasets:
                if ds in wait_for:
                    q.task_done()
                    q.get()
                grep(ds, sliceno)
        except KeyboardInterrupt:
            return
        except IOError as e:
            if e.errno == errno.EPIPE:
                return
            else:
                raise
        finally:
            # Make sure we are joinable
            try:
                q.task_done()
            except Exception:
                pass

    headers_prefix = []
    if args.show_dataset:
        headers_prefix.append('[DATASET]')
    if args.show_sliceno:
        headers_prefix.append('[SLICE]')
    if args.show_lineno:
        headers_prefix.append('[LINE]')

    headers = {}
    if args.headers:
        if columns:
            current_headers = columns
        else:
            current_headers = None
            for ds in datasets:
                candidate_headers = sorted(ds.columns)
                if candidate_headers != current_headers:
                    headers[ds] = current_headers = candidate_headers
            current_headers = headers.pop(datasets[0])

        def show_headers(headers):
            if args.format != 'json':
                show_items = headers_prefix + headers
                if escape_item:
                    show_items = list(map(escape_item, show_items))
                print(
                    separate(map(colour.blue, show_items),
                             map(len, show_items)))

        show_headers(current_headers)

    queues = []
    children = []
    if not args.ordered:
        q = None
        wait_for = set(headers)
        for sliceno in want_slices[1:]:
            if wait_for:
                q = JoinableQueue()
                q.put(None)
                queues.append(q)
            p = Process(
                target=one_slice,
                args=(sliceno, q, wait_for),
                name='slice-%d' % (sliceno, ),
            )
            p.daemon = True
            p.start()
            children.append(p)
        want_slices = want_slices[:1]

    try:
        for ds in datasets:
            if ds in headers:
                for q in queues:
                    q.join()
                show_headers(headers.pop(ds))
                for q in queues:
                    q.put(None)
            for sliceno in want_slices:
                grep(ds, sliceno)
        for c in children:
            c.join()
    except KeyboardInterrupt:
        print()
예제 #9
0
def main(argv, cfg):
    usage = "%(prog)s [options] ds [ds [...]]"
    parser = ArgumentParser(prog=argv.pop(0), usage=usage)
    parser.add_argument('-c',
                        '--chain',
                        action='store_true',
                        help='list all datasets in a chain')
    parser.add_argument('-C',
                        '--non-empty-chain',
                        action='store_true',
                        help='list all non-empty datasets in a chain')
    parser.add_argument('-l',
                        '--list',
                        action='store_true',
                        help='list all datasets in a job with number of rows')
    parser.add_argument(
        '-L',
        '--chainedlist',
        action='store_true',
        help='list all datasets in a job with number of chained rows')
    parser.add_argument('-m',
                        '--suppress-minmax',
                        action='store_true',
                        help='do not print min/max column values')
    parser.add_argument('-n',
                        '--suppress-columns',
                        action='store_true',
                        help='do not print columns')
    parser.add_argument('-q',
                        '--suppress-errors',
                        action='store_true',
                        help='silently ignores bad input datasets/jobids')
    parser.add_argument(
        '-s',
        '--slices',
        action='store_true',
        help='list relative number of lines per slice in sorted order')
    parser.add_argument('-S',
                        '--chainedslices',
                        action='store_true',
                        help='same as -s but for full chain')
    parser.add_argument('-w',
                        '--location',
                        action='store_true',
                        help='show where (ds/filename) each column is stored')
    parser.add_argument(
        "dataset",
        nargs='+',
        help=
        'the job part of the dataset name can be specified in the same ways as for "ax job". you can use ds~ or ds~N to follow the chain N steps backwards, or ^ to follow .parent. this requires specifying the ds-name, so wd-1~ will not do this, but wd-1/default~ will.'
    )
    args = parser.parse_intermixed_args(argv)
    args.chain = args.chain or args.non_empty_chain

    def finish(badinput):
        if badinput and not args.suppress_errors:
            print('Error, failed to resolve datasets:', file=sys.stderr)
            for n, e in badinput:
                print('    %r: %s' % (
                    n,
                    e,
                ), file=sys.stderr)
            exit(1)
        exit()

    badinput = []

    if args.list or args.chainedlist:
        for n in args.dataset:
            try:
                try:
                    dsvec = name2ds(cfg, n).job.datasets
                except NoSuchWhateverError:
                    dsvec = name2job(cfg, n).datasets
            except Exception as e:
                badinput.append((n, e))
                dsvec = None
            if dsvec:
                print('%s' % (dsvec[0].job, ))
                v = []
                for ds in dsvec:
                    if args.chainedlist:
                        lines = sum(sum(x.lines) for x in ds.chain())
                    else:
                        lines = sum(ds.lines)
                    v.append((ds.name, '{:n}'.format(lines)))
                len_n, len_l = colwidth(v)
                template = "{0:%d}  ({1:>%d})" % (len_n, len_l)
                for name, numlines in sorted(v):
                    print('    ' + template.format(name, numlines))
        finish(badinput)

    for n in args.dataset:
        try:
            ds = name2ds(cfg, n)
        except NoSuchWhateverError as e:
            badinput.append((n, e))
            continue

        print(ds.quoted)
        if ds.parent:
            if isinstance(ds.parent, tuple):
                print("    Parents:")
                max_n = max(len(x.quoted) for x in ds.parent)
                template = "{1:%d}" % (max_n, )
                data = tuple(
                    (None, x.quoted) for ix, x in enumerate(ds.parent))
                data = sorted(data, key=lambda x: x[1])
                printcolwise(data, template, lambda x: x, minrows=8, indent=8)
            else:
                print("    Parent:", ds.parent.quoted)
        print("    Method:", quote(ds.job.method))
        if ds.filename:
            print("    Filename:", quote(ds.filename))
        if ds.previous:
            print("    Previous:", ds.previous.quoted)
        if ds.hashlabel is not None:
            print("    Hashlabel:", quote(ds.hashlabel))

        def prettyminmax(minval, maxval):
            if args.suppress_minmax:
                return ''
            s = '[%%%ds, %%%ds]' % (MINMAXWIDTH, MINMAXWIDTH)
            if minval is None:
                return ''
            elif isinstance(minval, float):

                def intdigits(x):
                    if isinf(x) or isnan(x):
                        return 3
                    return min(MINMAXWIDTH -
                               2, floor(log10(abs(x)) +
                                        1)) if x else (MINMAXWIDTH - 2) // 2

                ints = max(intdigits(minval), intdigits(maxval))
                if ints > 0:
                    format = "%% %d.%df" % (ints, MINMAXWIDTH - ints - 2)
                elif ints < -4:
                    format = "%% .%de" % (MINMAXWIDTH - 7, )
                else:
                    format = "%% .%df" % (MINMAXWIDTH - 3, )

                def format_or_int(v):
                    try:
                        i = int(v)
                        if v == i:
                            return i
                    except (OverflowError, ValueError):
                        pass
                    return locale.format_string(format, v)

                return s % (format_or_int(minval), format_or_int(maxval))
            elif isinstance(minval, int):
                return s % (minval, maxval)
            elif isinstance(minval, (date, time, datetime)):
                return s % (minval, maxval)
            else:
                return s % (minval, maxval)

        if not args.suppress_columns:
            print("    Columns:")
            name2typ = {
                n: c.type + '+None' if c.none_support else c.type
                for n, c in ds.columns.items()
            }
            len_n, len_t = colwidth(
                (quote(n), name2typ[n]) for n, c in ds.columns.items())
            if args.location:
                len_l = max(
                    len(quote(c.location)) for c in ds.columns.values())
                len_c = max(len(c.compression) for c in ds.columns.values())
                template = '        {2} {0:%d}  {1:%d}  {4:%d} {5:%d}  {3}' % (
                    len_n,
                    len_t,
                    len_l,
                    len_c,
                )
            else:
                template = '        {2} {0:%d}  {1:%d}  {3}' % (
                    len_n,
                    len_t,
                )
            chain = False
            if args.chainedslices or args.chain:
                chain = ds.chain()
            for n, c in sorted(ds.columns.items()):
                if chain:
                    minval, maxval = chain.min(n), chain.max(n)
                else:
                    minval, maxval = c.min, c.max
                hashdot = colour("*",
                                 "ds/highlight") if n == ds.hashlabel else " "
                print(
                    template.format(quote(n), name2typ[n], hashdot,
                                    prettyminmax(minval, maxval),
                                    quote(c.location), c.compression).rstrip())
            print("    {0:n} columns".format(len(ds.columns)))
        print("    {0:n} lines".format(sum(ds.lines)))

        if ds.previous or args.chain:
            chain = ds.chain()
            if args.non_empty_chain:
                print("    Full chain length {0:n}, from {1} to {2}".format(
                    len(chain), chain[0], chain[-1]))
                chain = [ds for ds in chain if sum(ds.lines)]
                print("    Filtered chain length {0:n}".format(len(chain)))
            if chain:
                if not args.non_empty_chain:
                    print("    Chain length {0:n}, from {1} to {2}".format(
                        len(chain), chain[0], chain[-1]))
                if args.chain:
                    data = tuple((ix, "%s/%s" % (x.job, x.name),
                                  "{:n}".format(sum(x.lines)))
                                 for ix, x in enumerate(chain))
                    max_n, max_l = colwidth(x[1:] for x in data)
                    template = "{0:3}: {1:%d} ({2:>%d})" % (max_n, max_l)
                    printcolwise(data,
                                 template,
                                 lambda x: (x[0], x[1], x[2]),
                                 minrows=8,
                                 indent=8)

        if args.slices or args.chainedslices:
            if args.chainedslices and ds.previous:
                data = (
                    (ix, '{:n}'.format(sum(x)), sum(x))
                    for ix, x in enumerate(zip(*(x.lines
                                                 for x in ds.chain()))))
                print('    Balance, lines per slice, full chain:')
            else:
                data = ((ix, '{:n}'.format(x), x)
                        for ix, x in enumerate(ds.lines))
                if ds.previous:
                    print('    Balance, lines per slice, tip dataset:')
                else:
                    print('    Balance, lines per slice:')
            data = sorted(data, key=lambda x: -x[2])
            s = sum(x[2] for x in data)
            len_n = max(len(x[1]) for x in data)
            template = "{0:3}: {1!s}%% ({2:>%d})" % (len_n, )
            printcolwise(
                data,
                template,
                lambda x:
                (x[0], locale.format_string("%6.2f", (100 * x[2] /
                                                      (s or 1e20))), x[1]),
                minrows=8,
                indent=8)
            print("    Max to average ratio: " +
                  locale.format_string("%2.3f", (max(x[2] for x in data) /
                                                 ((s or 1e20) / len(data)), )))

        if ds.previous:
            print("    {0:n} total lines in chain".format(
                sum(sum(ds.lines) for ds in chain)))

    finish(badinput)
예제 #10
0
def main(argv, cfg):
    # -C overrides -A and -B (which in turn override -C)
    class ContextAction(Action):
        def __call__(self, parser, namespace, values, option_string=None):
            namespace.before_context = namespace.after_context = values

    parser = ArgumentParser(
        usage=
        "%(prog)s [options] [-e] pattern [...] [-d] ds [...] [[-n] column [...]]",
        description="""positional arguments:
  pattern               (-e, --regexp)
  dataset               (-d, --dataset) can be specified as for "ax ds"
  columns               (-n, --column)""",
        prog=argv.pop(0),
        formatter_class=RawTextHelpFormatter,
    )
    parser.add_argument(
        '-c',
        '--chain',
        action='store_true',
        help="follow dataset chains",
    )
    parser.add_argument(
        '--colour',
        '--color',
        nargs='?',
        const='always',
        choices=['auto', 'never', 'always'],
        type=str.lower,
        help="colour matched text. can be auto, never or always",
        metavar='WHEN',
    )
    parser.add_argument(
        '-i',
        '--ignore-case',
        action='store_true',
        help="case insensitive pattern",
    )
    parser.add_argument(
        '-v',
        '--invert-match',
        action='store_true',
        help="select non-matching lines",
    )
    parser.add_argument(
        '-o',
        '--only-matching',
        action='store_true',
        help="only print matching part (or columns with -l)",
    )
    parser.add_argument(
        '-l',
        '--list-matching',
        action='store_true',
        help=
        "only print matching datasets (or slices with -S)\nwhen used with -o, only print matching columns",
    )
    parser.add_argument(
        '-H',
        '--headers',
        action='store_true',
        help="print column names before output (and on each change)",
    )
    parser.add_argument(
        '-O',
        '--ordered',
        action='store_true',
        help="output in order (one slice at a time)",
    )
    parser.add_argument(
        '-M',
        '--allow-missing-columns',
        action='store_true',
        help="datasets are allowed to not have (some) columns",
    )
    parser.add_argument(
        '-g',
        '--grep',
        action='append',
        help="grep this column only, can be specified multiple times",
        metavar='COLUMN')
    parser.add_argument(
        '-s',
        '--slice',
        action='append',
        help="grep this slice only, can be specified multiple times",
        type=int)
    parser.add_argument(
        '-D',
        '--show-dataset',
        action='store_true',
        help="show dataset on matching lines",
    )
    parser.add_argument(
        '-S',
        '--show-sliceno',
        action='store_true',
        help="show sliceno on matching lines",
    )
    parser.add_argument(
        '-L',
        '--show-lineno',
        action='store_true',
        help="show lineno (per slice) on matching lines",
    )
    supported_formats = (
        'csv',
        'raw',
        'json',
    )
    parser.add_argument(
        '-f',
        '--format',
        default='csv',
        choices=supported_formats,
        help="output format, csv (default) / " +
        ' / '.join(supported_formats[1:]),
        metavar='FORMAT',
    )
    parser.add_argument(
        '-t',
        '--separator',
        help="field separator, default tab / tab-like spaces",
    )
    parser.add_argument(
        '-T',
        '--tab-length',
        type=int,
        metavar='LENGTH',
        help="field alignment, always uses spaces as separator",
    )
    parser.add_argument(
        '-B',
        '--before-context',
        type=int,
        default=0,
        metavar='NUM',
        help="print NUM lines of leading context",
    )
    parser.add_argument(
        '-A',
        '--after-context',
        type=int,
        default=0,
        metavar='NUM',
        help="print NUM lines of trailing context",
    )
    parser.add_argument(
        '-C',
        '--context',
        type=int,
        default=0,
        metavar='NUM',
        action=ContextAction,
        help="print NUM lines of context\n" +
        "context is only taken from the same slice of the same\n" +
        "dataset, and may intermix with output from other\n" +
        "slices. Use -O to avoid that, or -S -L to see it.",
    )
    parser.add_argument('-e',
                        '--regexp',
                        default=[],
                        action='append',
                        dest='patterns',
                        help=SUPPRESS)
    parser.add_argument('-d',
                        '--dataset',
                        default=[],
                        action='append',
                        dest='datasets',
                        help=SUPPRESS)
    parser.add_argument('-n',
                        '--column',
                        default=[],
                        action='append',
                        dest='columns',
                        help=SUPPRESS)
    parser.add_argument('words', nargs='*', help=SUPPRESS)
    args = parser.parse_intermixed_args(argv)

    if args.before_context < 0 or args.after_context < 0:
        print('Context must be >= 0', file=sys.stderr)
        return 1

    columns = args.columns

    try:
        args.datasets = [name2ds(cfg, ds) for ds in args.datasets]
    except NoSuchWhateverError as e:
        print(e, file=sys.stderr)
        return 1

    for word in args.words:
        if not args.patterns:
            args.patterns.append(word)
        elif columns and args.datasets:
            columns.append(word)
        else:
            try:
                args.datasets.append(name2ds(cfg, word))
            except NoSuchWhateverError as e:
                if not args.datasets:
                    print(e, file=sys.stderr)
                    return 1
                columns.append(word)

    if not args.patterns or not args.datasets:
        parser.print_help(file=sys.stderr)
        return 1

    datasets = args.datasets
    patterns = []
    for pattern in args.patterns:
        try:
            patterns.append(
                re.compile(pattern, re.IGNORECASE if args.ignore_case else 0))
        except re.error as e:
            print("Bad pattern %r:\n%s" % (
                pattern,
                e,
            ), file=sys.stderr)
            return 1

    grep_columns = set(args.grep or ())
    if grep_columns == set(columns):
        grep_columns = set()

    if args.slice:
        want_slices = []
        for s in args.slice:
            assert 0 <= s < g.slices, "Slice %d not available" % (s, )
            if s not in want_slices:
                want_slices.append(s)
    else:
        want_slices = list(range(g.slices))

    if len(want_slices) == 1:
        # it will be automatically ordered, so let's not work for it.
        args.ordered = False

    if args.only_matching:
        if args.list_matching:
            args.list_matching = False
            only_matching = 'columns'
        else:
            only_matching = 'part'
    else:
        only_matching = False

    if args.chain:
        datasets = list(chain.from_iterable(ds.chain() for ds in datasets))

    def columns_for_ds(ds, columns=columns):
        if columns:
            return [n for n in columns if n in ds.columns]
        else:
            return sorted(ds.columns)

    if columns or grep_columns:
        if args.allow_missing_columns:
            keep_datasets = []
            for ds in datasets:
                if not columns_for_ds(ds):
                    continue
                if grep_columns and not columns_for_ds(ds, grep_columns):
                    continue
                keep_datasets.append(ds)
            if not keep_datasets:
                return 0
            datasets = keep_datasets
        else:
            bad = False
            need_cols = set(columns)
            if grep_columns:
                need_cols.update(grep_columns)
            for ds in datasets:
                missing = need_cols - set(ds.columns)
                if missing:
                    print('ERROR: %s does not have columns %r' % (
                        ds,
                        missing,
                    ),
                          file=sys.stderr)
                    bad = True
            if bad:
                return 1

    # For the status reporting, this gives how many lines have been processed
    # when reaching each ds ix, per slice. Ends with an extra fictional ds,
    # i.e. the total number of lines for that slice. And then the same again,
    # to simplify the code in the status shower.
    total_lines_per_slice_at_ds = [[0] * g.slices]
    for ds in datasets:
        total_lines_per_slice_at_ds.append(
            [a + b for a, b in zip(total_lines_per_slice_at_ds[-1], ds.lines)])
    total_lines_per_slice_at_ds.append(total_lines_per_slice_at_ds[-1])
    status_interval = {
        # twice per percent, but not too often or too seldom
        sliceno: min(max(total_lines_per_slice_at_ds[-1][sliceno] // 200, 10),
                     5000)
        for sliceno in want_slices
    }

    # never and always override env settings, auto (default) sets from env/tty
    if args.colour == 'never':
        colour.disable()
        highlight_matches = False
    elif args.colour == 'always':
        colour.enable()
        highlight_matches = True
    else:
        args.colour = 'auto'
        highlight_matches = colour.enabled

    # Don't highlight everything when just trying to cat
    if args.patterns == ['']:
        highlight_matches = False
    # Don't highlight anything with -l
    if args.list_matching:
        highlight_matches = False

    if args.format == 'json':
        # headers was just a mistake, ignore it
        args.headers = False

    separator = args.separator
    if args.tab_length:
        separator = None
    elif separator is None and not sys.stdout.isatty():
        separator = '\t'

    if separator is None:
        # special case where we try to be like a tab, but with spaces.
        # this is useful because terminals typically don't style tabs.
        # and also so you can change the length of tabs.
        if (args.tab_length or 0) < 1:
            args.tab_length = 8

        def separate(items, lens):
            things = []
            for item, item_len in zip(items, lens):
                things.append(item)
                spaces = args.tab_length - (item_len % args.tab_length)
                things.append(colour(' ' * spaces, 'grep/separator'))
            return ''.join(things[:-1])

        separator = '\t'
    else:
        separator_coloured = colour(separator, 'grep/separator')

        def separate(items, lens):
            return separator_coloured.join(items)

    def json_default(obj):
        if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)):
            return str(obj)
        elif isinstance(obj, complex):
            return [obj.real, obj.imag]
        else:
            return repr(obj)

    if args.format == 'csv':

        def escape_item(item):
            if item and (separator in item or item[0] in '\'"'
                         or item[-1] in '\'"'):
                return '"' + item.replace('\n', '\\n').replace('"', '""') + '"'
            else:
                return item.replace('\n', '\\n')

        errors = 'surrogatepass'
    else:
        escape_item = None
        errors = 'replace' if PY2 else 'surrogateescape'

    # This is for the ^T handling. Each slice sends an update when finishing
    # a dataset, and every status_interval[sliceno] lines while iterating.
    # To minimise the data sent the only information sent over the queue
    # is (sliceno, finished_dataset).
    # Status printing is triggered by ^T (or SIGINFO if that is available)
    # or by SIGUSR1.
    # Pressing it again within two seconds prints stats per slice too.
    q_status = mp.LockFreeQueue()

    def status_collector():
        q_status.make_reader()
        status = {sliceno: [0, 0] for sliceno in want_slices}
        #            [ds_ix, done_lines]
        total_lines = sum(total_lines_per_slice_at_ds[-1])
        previous = [0]
        # base colour conf in if stderr is a tty, not stdout.
        if args.colour == 'auto':
            colour.configure_from_environ(stdout=sys.stderr)

        def show(sig, frame):
            t = monotonic()
            verbose = (previous[0] + 2 > t)  # within 2 seconds of previous
            previous[0] = t
            ds_ixes = []
            progress_lines = []
            progress_fraction = []
            for sliceno in want_slices:
                ds_ix, done_lines = status[sliceno]
                ds_ixes.append(ds_ix)
                max_possible = min(
                    done_lines + status_interval[sliceno],
                    total_lines_per_slice_at_ds[ds_ix + 1][sliceno])
                done_lines = (done_lines +
                              max_possible) / 2  # middle of the possibilities
                progress_lines.append(done_lines)
                total = total_lines_per_slice_at_ds[-1][sliceno]
                if total == 0:
                    progress_fraction.append(1)
                else:
                    progress_fraction.append(done_lines / total)
            progress_total = sum(progress_lines) / (total_lines or 1)
            bad_cutoff = progress_total - 0.1
            if verbose:
                show_ds = (len(datasets) > 1 and min(ds_ixes) != max(ds_ixes))
                for sliceno, ds_ix, p in zip(want_slices, ds_ixes,
                                             progress_fraction):
                    if ds_ix == len(datasets):
                        msg = 'DONE'
                    else:
                        msg = '{0:d}% of {1:n} lines'.format(
                            round(p * 100),
                            total_lines_per_slice_at_ds[-1][sliceno])
                        if show_ds:
                            msg = '%s (in %s)' % (
                                msg,
                                datasets[ds_ix].quoted,
                            )
                    msg = '%9d: %s' % (
                        sliceno,
                        msg,
                    )
                    if p < bad_cutoff:
                        msg = colour(msg, 'grep/infohighlight')
                    else:
                        msg = colour(msg, 'grep/info')
                    write(2, msg.encode('utf-8') + b'\n')
            msg = '{0:d}% of {1:n} lines'.format(round(progress_total * 100),
                                                 total_lines)
            if len(datasets) > 1:
                min_ds = min(ds_ixes)
                max_ds = max(ds_ixes)
                if min_ds < len(datasets):
                    ds_name = datasets[min_ds].quoted
                    extra = '' if min_ds == max_ds else ' ++'
                    msg = '%s (in %s%s)' % (
                        msg,
                        ds_name,
                        extra,
                    )
            worst = min(progress_fraction)
            if worst < bad_cutoff:
                msg = '%s, worst %d%%' % (
                    msg,
                    round(worst * 100),
                )
            msg = colour('  SUMMARY: %s' % (msg, ), 'grep/info')
            write(2, msg.encode('utf-8') + b'\n')

        for signame in ('SIGINFO', 'SIGUSR1'):
            if hasattr(signal, signame):
                sig = getattr(signal, signame)
                signal.signal(sig, show)
                if hasattr(signal, 'pthread_sigmask'):
                    signal.pthread_sigmask(signal.SIG_UNBLOCK, {sig})
        tc_original = None
        using_stdin = False
        if not hasattr(signal, 'SIGINFO') and sys.stdin.isatty():
            # ^T wont work automatically on this OS, so we need to handle it as terminal input
            import termios
            from accelerator.compat import selectors
            sel = selectors.DefaultSelector()
            sel.register(0, selectors.EVENT_READ)
            sel.register(q_status.r, selectors.EVENT_READ)
            try:
                tc_original = termios.tcgetattr(0)
                tc_changed = list(tc_original)
                tc_changed[3] &= ~(termios.ICANON | termios.IEXTEN)
                termios.tcsetattr(0, termios.TCSADRAIN, tc_changed)
                using_stdin = True
            except Exception:
                pass
            # we can't set stdin nonblocking, because it's probably the same
            # file description as stdout, so work around that with alarms.
            def got_alarm(sig, frame):
                raise IOError()

            signal.signal(signal.SIGALRM, got_alarm)
        try:
            while True:
                if using_stdin:
                    do_q = False
                    for key, _ in sel.select():
                        if key.fd == 0:
                            try:
                                signal.alarm(
                                    1
                                )  # in case something else read it we block for max 1 second
                                try:
                                    pressed = ord(os.read(0, 1))
                                finally:
                                    signal.alarm(0)
                                if pressed == 20:
                                    write(2,
                                          b'\n')  # "^T" shows in the terminal
                                    os.kill(os.getpid(), signal.SIGUSR1)
                            except Exception:
                                pass
                        elif key.fd == q_status.r:
                            do_q = True
                    if not do_q:
                        continue
                try:
                    sliceno, finished_dataset = q_status.get()
                except QueueEmpty:
                    return
                if finished_dataset:
                    ds_ix = status[sliceno][0] + 1
                    status[sliceno] = [
                        ds_ix, total_lines_per_slice_at_ds[ds_ix][sliceno]
                    ]
                else:
                    status[sliceno][1] += status_interval[sliceno]
        finally:
            if tc_original is not None:
                try:
                    termios.tcsetattr(0, termios.TCSADRAIN, tc_original)
                except Exception:
                    pass

    status_process = mp.SimplifiedProcess(target=status_collector,
                                          name='ax grep status')
    # everything else will write, so make it a writer right away
    q_status.make_writer()

    # Output is only allowed while holding this lock, so that long lines
    # do not get intermixed. (Or when alone in producing output.)
    io_lock = Lock()

    # This contains some extra stuff to be a better base for the other
    # outputters.
    # When used directly it enforces no ordering, but merges smaller writes
    # to keep the number of syscalls down.

    class Outputter:
        def __init__(self, q_in, q_out):
            self.q_in = q_in
            self.q_out = q_out
            self.buffer = []
            self.merge_buffer = b''

        def put(self, data):
            self.merge_buffer += data
            if len(self.merge_buffer) >= 1024:
                self.move_merge()

        def move_merge(self):
            if self.merge_buffer:
                with io_lock:
                    write(1, self.merge_buffer)
                self.merge_buffer = b''

        def start(self, ds):
            pass

        def end(self, ds):
            self.move_merge()

        def finish(self):
            pass

        def full(self):
            return len(self.buffer) > 5000

        def excite(self):
            self.move_merge()
            if self.buffer:
                self.pump(False)

    # Partially ordered output, each header change acts as a fence.
    # This is used in all slices except the first.
    #
    # The queue gets True when the previous slice is ready for the next
    # header change, and None when the header is printed (and it's ok
    # to resume output).

    class HeaderWaitOutputter(Outputter):
        def start(self, ds):
            if ds in headers:
                self.add_wait()
            else:
                self.excite()

        def add_wait(self):
            # Each sync point is separated by None in the buffer
            self.buffer.append(None)
            self.buffer.append(b'')  # Avoid need for special case in .drain
            self.pump()

        def move_merge(self):
            data = self.merge_buffer
            self.merge_buffer = b''
            if self.buffer:
                self.pump()
                if self.buffer:
                    self.buffer.append(data)
                    return
            with io_lock:
                write(1, data)

        def pump(self, wait=None):
            if wait is None:
                wait = self.full()
            try:
                got = self.q_in.get(wait)
            except QueueEmpty:
                if wait:
                    # previous slice has exited without sending all messages
                    raise
                return
            if got is True:
                # since pump is only called when we have outputted all
                # currently allowed output or when the next message is an
                # unblock for such output we can just unconditionally send
                # the True on to the next slice here.
                self.q_out.put(True)
                self.pump(wait)
                return
            else:
                self.q_out.put(None)
                self.drain()

        def drain(self):
            assert self.buffer[
                0] is None, 'The buffer must always stop at a sync point (or empty)'
            with io_lock:
                for pos, data in enumerate(self.buffer[1:], 1):
                    if data is None:
                        break
                    elif data:
                        write(1, data)
                else:
                    # We did not reach the next fence, so last item is real data
                    # and needs to be removed. (The buffer will then be empty and
                    # output will continue directly until reaching the sync point.)
                    pos += 1
            self.buffer[:pos] = ()

        def finish(self):
            while self.buffer:
                self.pump(True)

    # Partially ordered output, each header change acts as a fence.
    # This is used only in the first slice, and outputs the headers.
    #
    # When it is ready to output headers it sends True in the queue.
    # When the True has travelled around the queue ring all slices are
    # ready, the headers are printed, and None is sent to let the other
    # slices resume output.
    # (When the None returns it is ignored, because output is resumed
    # as soon as the headers are printed.)

    class HeaderOutputter(HeaderWaitOutputter):
        def add_wait(self):
            if not self.buffer:
                self.q_out.put(True)
            self.buffer.append(None)
            self.buffer.append(
                b'')  # Avoid need for special case in .drain/.put
            self.pump()

        def drain(self):
            assert self.buffer[
                0] is None, 'The buffer must always stop at a sync point (or empty)'
            with io_lock:
                for pos, data in enumerate(self.buffer[1:], 1):
                    if data is None:
                        self.q_out.put(True)
                        break
                    elif data:
                        write(1, data)
                else:
                    pos += 1
            self.buffer[:pos] = ()

        def pump(self, wait=None):
            if wait is None:
                wait = self.full()
            try:
                got = self.q_in.get(wait)
            except QueueEmpty:
                if wait:
                    # previous slice has exited without sending all messages
                    raise
                return
            if got is True:
                # The True we put in when reaching the fence has travelled
                # all the way around the queue ring, it's time to print the
                # new headers
                write(1, next(headers_iter))
                # and then unblock the other slices
                self.q_out.put(None)
                self.drain()
                # No else, when the None comes back we just drop it.
            if not wait:
                self.pump(False)

    # Fully ordered output, each slice waits for the previous slice.
    # For each ds, waits for None (anything really) before starting,
    # sends None when done.

    class OrderedOutputter(Outputter):
        def start(self, ds):
            # Each ds is separated by None in the buffer
            self.buffer.append(None)
            self.buffer.append(b'')  # Avoid need for special case in .drain
            self.pump()

        def end(self, ds):
            self.move_merge()
            if not self.buffer:
                # We are done with this ds, so let next slice continue
                self.q_out.put(None)

        def pump(self, wait=None):
            if wait is None:
                wait = self.full()
            try:
                self.q_in.get(wait)
            except QueueEmpty:
                if wait:
                    # previous slice has exited without sending all messages
                    raise
                return
            self.drain()

        def move_merge(self):
            data = self.merge_buffer
            self.merge_buffer = b''
            if self.buffer:
                self.pump()
                if self.buffer:
                    self.buffer.append(data)
                    return
            # No need for a lock, the other slices aren't writing concurrently.
            write(1, data)

        def drain(self):
            assert self.buffer[0] is None
            for pos, data in enumerate(self.buffer[1:], 1):
                if data is None:
                    # We are done with this ds, so let next slice continue
                    self.q_out.put(None)
                    break
                elif data:
                    write(1, data)
            else:
                # We did not reach the next ds, so last item is real data and
                # needs to be removed. (The buffer will then be empty and
                # output will continue directly until reaching the next ds.)
                pos += 1
            self.buffer[:pos] = ()

        def finish(self):
            not_finished = bool(self.buffer)
            while self.buffer:
                self.pump(True)
            if not_finished:
                self.q_out.put(None)

    # Same as above but for the first slice so it prints headers when needed.

    class OrderedHeaderOutputter(OrderedOutputter):
        def start(self, ds):
            # Each ds is separated by None in the buffer
            self.buffer.append(None)
            if ds in headers:
                # Headers changed, start with those.
                self.buffer.append(next(headers_iter))
            else:
                self.buffer.append(
                    b'')  # Avoid need for special case in .drain
            self.pump()

    # Choose the right outputter for the kind of sync we need.
    def outputter(q_in, q_out, first_slice=False):
        if args.list_matching:
            cls = Outputter
        elif args.ordered:
            if first_slice:
                cls = OrderedHeaderOutputter
            else:
                cls = OrderedOutputter
        elif headers:
            if first_slice:
                cls = HeaderOutputter
            else:
                cls = HeaderWaitOutputter
        else:
            cls = Outputter
        return cls(q_in, q_out)

    # Make printer for the selected output options
    def make_show(prefix, used_columns):
        def matching_ranges(item):
            ranges = []
            for p in patterns:
                ranges.extend(m.span() for m in p.finditer(item))
            if not ranges:
                return
            # merge overlapping/adjacent ranges
            ranges.sort()
            ranges = iter(ranges)
            start, stop = next(ranges)
            for a, b in ranges:
                if a <= stop:
                    stop = max(stop, b)
                else:
                    yield start, stop
                    start, stop = a, b
            yield start, stop

        def filter_item(item):
            return ''.join(item[a:b] for a, b in matching_ranges(item))

        if args.format == 'json':
            dumps = json.JSONEncoder(ensure_ascii=False,
                                     default=json_default).encode

            def show(lineno, items):
                if only_matching == 'part':
                    items = [filter_item(unicode(item)) for item in items]
                if only_matching == 'columns':
                    d = {
                        k: v
                        for k, v in zip(used_columns, items)
                        if filter_item(unicode(v))
                    }
                else:
                    d = dict(zip(used_columns, items))
                if args.show_lineno:
                    prefix['lineno'] = lineno
                if prefix:
                    prefix['data'] = d
                    d = prefix
                return dumps(d).encode('utf-8', 'surrogatepass') + b'\n'
        else:

            def colour_item(item):
                pos = 0
                parts = []
                for a, b in matching_ranges(item):
                    parts.extend(
                        (item[pos:a], colour(item[a:b], 'grep/highlight')))
                    pos = b
                parts.append(item[pos:])
                return ''.join(parts)

            def show(lineno, items):
                data = list(prefix)
                if args.show_lineno:
                    data.append(unicode(lineno))
                show_items = map(unicode, items)
                if only_matching:
                    if only_matching == 'columns':
                        show_items = (item if filter_item(item) else ''
                                      for item in show_items)
                    else:
                        show_items = map(filter_item, show_items)
                show_items = list(show_items)
                lens = (len(item) for item in data + show_items)
                if highlight_matches:
                    show_items = list(map(colour_item, show_items))
                if escape_item:
                    lens_unesc = (len(item) for item in data + show_items)
                    show_items = list(map(escape_item, show_items))
                    lens_esc = (len(item) for item in data + show_items)
                    lens = (
                        l + esc - unesc
                        for l, unesc, esc in zip(lens, lens_unesc, lens_esc))
                data.extend(show_items)
                return separate(data, lens).encode('utf-8', errors) + b'\n'

        return show

    # This is called for each slice in each dataset.
    # Each slice has a separate process (the same for all datasets).
    # The first slice runs in the main process (unless -l), everything
    # else runs from one_slice.

    def grep(ds, sliceno, out):
        out.start(ds)
        if len(patterns) == 1:
            chk = patterns[0].search
        else:

            def chk(s):
                return any(p.search(s) for p in patterns)

        first = [True]

        def mk_iter(col):
            kw = {}
            if first[0]:
                first[0] = False
                lines = ds.lines[sliceno]
                if lines > status_interval[sliceno]:

                    def cb(n):
                        q_status.put((sliceno, False))
                        out.excite()

                    kw['callback'] = cb
                    kw['callback_interval'] = status_interval[sliceno]
            if ds.columns[col].type == 'ascii':
                kw['_type'] = 'unicode'
            it = ds._column_iterator(sliceno, col, **kw)
            if ds.columns[col].type == 'bytes':
                errors = 'replace' if PY2 else 'surrogateescape'
                if ds.columns[col].none_support:
                    it = (None if v is None else v.decode('utf-8', errors)
                          for v in it)
                else:
                    it = (v.decode('utf-8', errors) for v in it)
            return it

        used_columns = columns_for_ds(ds)
        used_grep_columns = grep_columns and columns_for_ds(ds, grep_columns)
        if grep_columns and set(used_grep_columns) != set(used_columns):
            grep_iter = izip(*(mk_iter(col) for col in used_grep_columns))
        else:
            grep_iter = repeat(None)
        lines_iter = izip(*(mk_iter(col) for col in used_columns))
        if args.before_context:
            before = deque((), args.before_context)
        else:
            before = None
        if args.format == 'json':
            prefix = {}
            if args.show_dataset:
                prefix['dataset'] = ds
            if args.show_sliceno:
                prefix['sliceno'] = sliceno
            show = make_show(prefix, used_columns)
        else:
            prefix = []
            if args.show_dataset:
                prefix.append(ds)
            if args.show_sliceno:
                prefix.append(str(sliceno))
            prefix = tuple(prefix)
            show = make_show(prefix, used_columns)
        if args.invert_match:
            maybe_invert = operator.not_
        else:
            maybe_invert = bool
        to_show = 0
        for lineno, (grep_items,
                     items) in enumerate(izip(grep_iter, lines_iter)):
            if maybe_invert(
                    any(chk(unicode(item)) for item in grep_items or items)):
                if q_list:
                    q_list.put((ds, sliceno))
                    return
                while before:
                    out.put(show(*before.popleft()))
                to_show = 1 + args.after_context
            if to_show:
                out.put(show(lineno, items))
                to_show -= 1
            elif before is not None:
                before.append((lineno, items))
        out.end(ds)

    # This runs in a separate process for each slice except the first
    # one (unless -l), which is handled specially in the main process.

    def one_slice(sliceno, q_in, q_out, q_to_close):
        if q_to_close:
            q_to_close.close()
        if q_in:
            q_in.make_reader()
        if q_out:
            q_out.make_writer()
        if q_list:
            q_list.make_writer()
        try:
            out = outputter(q_in, q_out)
            for ds in datasets:
                if seen_list is None or ds not in seen_list:
                    grep(ds, sliceno, out)
                q_status.put((sliceno, True))
            out.finish()
        except QueueEmpty:
            # some other process died, no need to print an error here
            sys.exit(1)

    headers_prefix = []
    if args.show_dataset:
        headers_prefix.append('[DATASET]')
    if args.show_sliceno:
        headers_prefix.append('[SLICE]')
    if args.show_lineno:
        headers_prefix.append('[LINE]')

    # {ds: headers} for each ds where headers change (not including the first).
    # this is every ds where sync between slices has to happen when not --ordered.
    headers = OrderedDict()
    if args.headers:
        current_headers = None
        for ds in datasets:
            candidate_headers = columns_for_ds(ds)
            if candidate_headers != current_headers:
                headers[ds] = current_headers = candidate_headers

        def gen_headers(headers):
            show_items = headers_prefix + headers
            if escape_item:
                show_items = list(map(escape_item, show_items))
            coloured = (colour(item, 'grep/header') for item in show_items)
            txt = separate(coloured, map(len, show_items))
            return txt.encode('utf-8', 'surrogatepass') + b'\n'

        # remove the starting ds, so no header changes means no special handling.
        current_headers = headers.pop(datasets[0])
        if not args.list_matching:
            write(1, gen_headers(current_headers))
        headers_iter = iter(map(gen_headers, headers.values()))

    q_in = q_out = first_q_out = q_to_close = q_list = None
    children = [status_process]
    seen_list = None
    if args.list_matching:
        # in this case all slices get their own process
        # and the main process just prints the maching slices
        q_list = mp.LockFreeQueue()
        separate_process_slices = want_slices
        if not args.show_sliceno:
            seen_list = mp.MpSet()
    else:
        separate_process_slices = want_slices[1:]
        if args.ordered or headers:
            # needs to sync in some way
            q_in = first_q_out = mp.LockFreeQueue()
    for sliceno in separate_process_slices:
        if q_in:
            q_out = mp.LockFreeQueue()
        p = mp.SimplifiedProcess(
            target=one_slice,
            args=(
                sliceno,
                q_in,
                q_out,
                q_to_close,
            ),
            name='slice-%d' % (sliceno, ),
        )
        children.append(p)
        if q_in and q_in is not first_q_out:
            q_in.close()
        q_to_close = first_q_out
        q_in = q_out
    if q_in:
        q_out = first_q_out
        q_in.make_reader()
        q_out.make_writer()
        if args.ordered:
            q_in.put_local(None)
    del q_to_close
    del first_q_out

    try:
        if args.list_matching:
            if args.headers:
                headers_prefix = ['[DATASET]']
                if seen_list is None:
                    headers_prefix.append('[SLICE]')
                write(1, gen_headers([]))
            ordered_res = defaultdict(set)
            q_list.make_reader()
            if seen_list is None:
                used_columns = ['dataset', 'sliceno']
            else:
                used_columns = ['dataset']
            inner_show = make_show({} if args.format == 'json' else [],
                                   used_columns)

            def show(ds, sliceno=None):
                if sliceno is None:
                    items = [ds]
                else:
                    items = [ds, sliceno]
                write(1, inner_show(None, items))

            while True:
                try:
                    ds, sliceno = q_list.get()
                except QueueEmpty:
                    break
                if seen_list is None:
                    if args.ordered:
                        ordered_res[ds].add(sliceno)
                    else:
                        show(ds, sliceno)
                elif ds not in seen_list:
                    seen_list.add(ds)
                    if not args.ordered:
                        show(ds)
            if args.ordered:
                for ds in datasets:
                    if seen_list is None:
                        for sliceno in sorted(ordered_res[ds]):
                            show(ds, sliceno)
                    else:
                        if ds in seen_list:
                            show(ds)
        else:
            out = outputter(q_in, q_out, first_slice=True)
            sliceno = want_slices[0]
            for ds in datasets:
                grep(ds, sliceno, out)
                q_status.put((sliceno, True))
            out.finish()
    except QueueEmpty:
        # don't print an error, probably a subprocess died from EPIPE before
        # the main process. (or the subprocess already printed an error.)
        return 1

    q_status.close()
    for c in children:
        c.join()
        if c.exitcode:
            return 1
예제 #11
0
 def show(sig, frame):
     t = monotonic()
     verbose = (previous[0] + 2 > t)  # within 2 seconds of previous
     previous[0] = t
     ds_ixes = []
     progress_lines = []
     progress_fraction = []
     for sliceno in want_slices:
         ds_ix, done_lines = status[sliceno]
         ds_ixes.append(ds_ix)
         max_possible = min(
             done_lines + status_interval[sliceno],
             total_lines_per_slice_at_ds[ds_ix + 1][sliceno])
         done_lines = (done_lines +
                       max_possible) / 2  # middle of the possibilities
         progress_lines.append(done_lines)
         total = total_lines_per_slice_at_ds[-1][sliceno]
         if total == 0:
             progress_fraction.append(1)
         else:
             progress_fraction.append(done_lines / total)
     progress_total = sum(progress_lines) / (total_lines or 1)
     bad_cutoff = progress_total - 0.1
     if verbose:
         show_ds = (len(datasets) > 1 and min(ds_ixes) != max(ds_ixes))
         for sliceno, ds_ix, p in zip(want_slices, ds_ixes,
                                      progress_fraction):
             if ds_ix == len(datasets):
                 msg = 'DONE'
             else:
                 msg = '{0:d}% of {1:n} lines'.format(
                     round(p * 100),
                     total_lines_per_slice_at_ds[-1][sliceno])
                 if show_ds:
                     msg = '%s (in %s)' % (
                         msg,
                         datasets[ds_ix].quoted,
                     )
             msg = '%9d: %s' % (
                 sliceno,
                 msg,
             )
             if p < bad_cutoff:
                 msg = colour(msg, 'grep/infohighlight')
             else:
                 msg = colour(msg, 'grep/info')
             write(2, msg.encode('utf-8') + b'\n')
     msg = '{0:d}% of {1:n} lines'.format(round(progress_total * 100),
                                          total_lines)
     if len(datasets) > 1:
         min_ds = min(ds_ixes)
         max_ds = max(ds_ixes)
         if min_ds < len(datasets):
             ds_name = datasets[min_ds].quoted
             extra = '' if min_ds == max_ds else ' ++'
             msg = '%s (in %s%s)' % (
                 msg,
                 ds_name,
                 extra,
             )
     worst = min(progress_fraction)
     if worst < bad_cutoff:
         msg = '%s, worst %d%%' % (
             msg,
             round(worst * 100),
         )
     msg = colour('  SUMMARY: %s' % (msg, ), 'grep/info')
     write(2, msg.encode('utf-8') + b'\n')
예제 #12
0
def main():
    # Several commands use SIGUSR1 which (naturally...) defaults to killing the
    # process, so start by blocking that to minimise the race time.
    if hasattr(signal, 'pthread_sigmask'):
        signal.pthread_sigmask(signal.SIG_BLOCK, {signal.SIGUSR1})
    else:
        # Or if we can't block it, just ignore it.
        signal.signal(signal.SIGUSR1, signal.SIG_IGN)

    # As of python 3.8 the default start_method is 'spawn' on macOS.
    # This doesn't work for us. 'fork' is fairly unsafe on macOS,
    # but it's better than not working at all. See
    # https://bugs.python.org/issue33725
    # for more information.
    import multiprocessing
    if hasattr(multiprocessing, 'set_start_method'):
        # If possible, make the forkserver (used by database updates) pre-import everthing
        if hasattr(multiprocessing, 'set_forkserver_preload'):
            multiprocessing.set_forkserver_preload(
                ['accelerator', 'accelerator.server'])
        multiprocessing.set_start_method('fork')

    from accelerator import g
    g.running = 'shell'

    from accelerator.autoflush import AutoFlush
    main_argv, argv = split_args(sys.argv[1:])
    sys.stdout = AutoFlush(sys.stdout)
    sys.stderr = AutoFlush(sys.stderr)

    # configuration defaults
    aliases = {
        'cat': 'grep -e ""',
    }
    colour_d = {
        'warning': ('RED', ),
        'highlight': ('BOLD', ),
        'grep/highlight': ('RED', ),
        'info': ('BRIGHTBLUE', ),
        'infohighlight': (
            'BOLD',
            'BRIGHTBLUE',
        ),
        'separator': (
            'CYAN',
            'UNDERLINE',
        ),
        'header': (
            'BRIGHTBLUE',
            'BOLD',
        ),
    }
    parse_user_config(aliases, colour_d)
    colour._names.update(colour_d)

    used_aliases = []
    while argv and argv[0] in aliases:
        alias = argv[0]
        if alias == 'noalias':  # save the user from itself
            break
        try:
            expanded = shlex.split(aliases[alias])
        except ValueError as e:
            raise ValueError('Failed to expand alias %s (%r): %s' % (
                argv[0],
                aliases[argv[0]],
                e,
            ))
        more_main_argv, argv = split_args(expanded + argv[1:])
        main_argv.extend(more_main_argv)
        if expanded and alias == expanded[0]:
            break
        used_aliases.append(alias)
        if alias in used_aliases[:-1]:
            raise ValueError('Alias loop: %r' % (used_aliases, ))

    while argv and argv[0] == 'noalias':
        argv.pop(0)

    epilog = ['commands:', '']
    cmdlen = max(len(cmd) for cmd in COMMANDS)
    template = '  %%%ds  %%s' % (cmdlen, )
    for cmd, func in sorted(COMMANDS.items()):
        epilog.append(template % (
            cmd,
            func.help,
        ))
    epilog.append('')
    epilog.append('aliases:')
    epilog.extend('  %s = %s' % item for item in sorted(aliases.items()))
    epilog.append('')
    epilog.append('use "' +
                  colour('%(prog)s <command> --help', 'help/highlight') +
                  '" for <command> usage')
    epilog.append('try "' + colour('%(prog)s intro', 'help/highlight') +
                  '" for an introduction')
    parser = ArgumentParser(
        usage='%(prog)s [--config CONFIG_FILE] command [args]',
        epilog='\n'.join(epilog),
        formatter_class=RawDescriptionHelpFormatter,
    )
    parser.add_argument('--config',
                        metavar='CONFIG_FILE',
                        help='configuration file')
    parser.add_argument('--version',
                        action='store_true',
                        help='alias for the version command')
    args = parser.parse_args(main_argv)
    if args.version:
        sys.exit(cmd_version(()))
    args.command = argv.pop(0) if argv else None
    if args.command not in COMMANDS:
        parser.print_help(file=sys.stderr)
        if args.command is not None:
            print(file=sys.stderr)
            print('Unknown command "%s"' % (args.command, ), file=sys.stderr)
        sys.exit(2)
    config_fn = args.config
    if args.command in (
            'init',
            'intro',
            'version',
    ):
        config_fn = False
    cmd = COMMANDS[args.command]
    debug_cmd = getattr(cmd, 'is_debug', False)
    try:
        setup(config_fn, debug_cmd)
        argv.insert(0, '%s %s' % (
            basename(sys.argv[0]),
            args.command,
        ))
        return cmd(argv)
    except UserError as e:
        print(e, file=sys.stderr)
        return 1
    except OSError as e:
        if e.errno == errno.EPIPE:
            return 1
        else:
            raise
    except KeyboardInterrupt:
        # Exiting with KeyboardInterrupt causes python to print a traceback.
        # We don't want that, but we do want to exit from SIGINT (so the
        # calling process can know that happened).
        signal.signal(signal.SIGINT, signal.SIG_DFL)
        os.kill(os.getpid(), signal.SIGINT)
        # If that didn't work let's re-raise the KeyboardInterrupt.
        raise
예제 #13
0
 def msg(txt='', c='intro/info'):
     if txt:
         print(colour(txt, c))
     else:
         print()
예제 #14
0
 def cmd(txt, *a):
     print('  ' + colour(txt, 'intro/highlight', *a))
예제 #15
0
	def msg(txt='', *a):
		if txt:
			print(colour(txt, 'brightblue', *a))
		else:
			print()
예제 #16
0
	def cmd(txt, *a):
		print('  ' + colour(txt, 'bold', *a))