Esempio n. 1
0
def cmd_abort(argv):
	parser = ArgumentParser(prog=argv.pop(0))
	parser.add_argument('-q', '--quiet', action='store_true', help="no output")
	args = parser.parse_args(argv)
	from accelerator.build import Automata
	a = Automata(cfg.url)
	res = a.abort()
	if not args.quiet:
		print("Killed %d running job%s." % (res.killed, '' if res.killed == 1 else 's'))
Esempio n. 2
0
def main(argv, cfg):
    descr = "lists and describes build scripts"
    parser = ArgumentParser(
        prog=argv.pop(0),
        description=descr,
    )
    parser.add_argument('-s',
                        '--short',
                        action='store_true',
                        help='short listing')
    parser.add_argument('-p',
                        '--path',
                        action='store_true',
                        help='show package paths')
    parser.add_argument('match',
                        nargs='*',
                        default=[],
                        help='substring used for matching')
    args = parser.parse_intermixed_args(argv)
    columns = terminal_size().columns

    if not args.match:
        # no args => list everything in short format
        args.match = ['']
        args.short = True

    packages = []
    for package in cfg.method_directories:
        path = dirname(import_module(package).__file__)
        scripts = []
        packages.append((package, path, scripts))
        for item in sorted(
                glob(path + '/build.py') + glob(path + '/build_*.py')):
            name = basename(item[:-3])
            modname = '.'.join((package, name))
            if any(m in modname for m in args.match):
                try:
                    module = import_module(modname)
                except Exception as e:
                    print('%s%s: %s%s' % (colour.RED, item, e, colour.RESET),
                          file=sys.stderr)
                    continue
                scripts.append((name, getattr(module, 'description', '')))

    for package, path, scripts in sorted(packages):
        if scripts:
            if args.path:
                print(path + '/')
            else:
                print(package)
            printdesc(sorted(scripts), columns, full=not args.short)
Esempio n. 3
0
def main(argv, cfg):
	usage = "%(prog)s [-a | [workdir [workdir [...]]]"
	parser = ArgumentParser(usage=usage, prog=argv.pop(0))
	parser.add_argument('-a', '--all', action='store_true', help="list all workdirs")
	parser.add_argument('workdirs', nargs='*', default=[])
	args = parser.parse_args(argv)

	if args.all:
		args.workdirs.extend(sorted(cfg.workdirs))

	if not args.workdirs:
		for wd in sorted(cfg.workdirs):
			print(wd)
		return

	for name in args.workdirs:
		if name not in cfg.workdirs:
			print("No such workdir:", name, file=sys.stderr)
			continue
		known = call(cfg.url + '/workdir/' + url_quote(name))
		for jid in workdir_jids(cfg, name):
			show_job(known, jid)

		try:
			latest = os.readlink(os.path.join(cfg.workdirs[name], name + '-LATEST'))
		except OSError:
			latest = None
		if latest:
			show_job(known, jid, name + '-LATEST')
Esempio n. 4
0
def main(argv, cfg):
    global authdict, allow_passwordless, db

    parser = ArgumentParser(prog=argv.pop(0))
    parser.add_argument(
        '--path',
        type=str,
        default='urd.db',
        help=
        'database directory (can be relative to project directory) (default: urd.db)',
    )
    parser.add_argument('--allow-passwordless',
                        action='store_true',
                        help='accept any pass for users not in passwd.')
    parser.add_argument('--quiet', action='store_true', help='less chatty.')
    args = parser.parse_args(argv)
    if not args.quiet:
        print('-' * 79)
        print(args)
        print()

    auth_fn = os.path.join(args.path, 'passwd')
    authdict = readauth(auth_fn)
    allow_passwordless = args.allow_passwordless
    if not authdict and not args.allow_passwordless:
        raise Exception(
            'No users in %r and --allow-passwordless not specified.' %
            (auth_fn, ))
    db = DB(args.path, not args.quiet)

    bottle.install(jsonify)

    kw = dict(debug=False,
              reloader=False,
              quiet=args.quiet,
              server=WaitressServer)
    listen = cfg.urd_listen
    if not listen:
        raise Exception('urd not configured in this project')
    if isinstance(listen, tuple):
        kw['host'], kw['port'] = listen
    else:
        from accelerator.server import check_socket
        check_socket(listen)
        kw['host'] = listen
        kw['port'] = 0
    bottle.run(**kw)
Esempio n. 5
0
def main(argv, cfg):
    descr = 'show setup.json, dataset list, etc for jobs'
    parser = ArgumentParser(
        prog=argv.pop(0),
        description=descr,
        formatter_class=RawTextHelpFormatter,
    )
    group = parser.add_mutually_exclusive_group()
    group.add_argument('-o',
                       '--output',
                       action='store_true',
                       help='show job output')
    group.add_argument('-O',
                       '--just-output',
                       action='store_true',
                       help='show only job output')
    group.add_argument('-P',
                       '--just-path',
                       action='store_true',
                       help='show only job path')
    parser.add_argument(
        'jobid',
        nargs='+',
        metavar='jobid/jobspec',
        help='jobid is just a jobid.\n' +
        'you can also use path, method or :urdlist:[entry].\n' +
        'path is to a jobdir (with setup.json in it).\n' +
        'method is the latest (current) job with that method (i.e\n' +
        'the latest finished job with current source code).\n' +
        ':urdlist:[entry] looks up jobs in urd. details are in the\n' +
        'urd help, except here entry defaults to -1 and you can\'t\n' +
        'list things (no .../ or .../since/x).\n' +
        'you can use spec~ or spec~N to go back N current jobs\n' +
        'with that method or spec^ or spec^N to follow .previous')
    args = parser.parse_intermixed_args(argv)
    res = 0
    for path in args.jobid:
        try:
            job = name2job(cfg, path)
            if args.just_output:
                out = job.output()
                if out:
                    print(out, end='' if out.endswith('\n') else '\n')
            elif args.just_path:
                print(job.path)
            else:
                show(cfg.url, job, args.output)
        except JobNotFound as e:
            print(e)
            res = 1
        except Exception as e:
            if isinstance(e, OSError) and e.errno == errno.EPIPE:
                raise
            print_exc(file=sys.stderr)
            print("Failed to show %r" % (path, ), file=sys.stderr)
            res = 1
    return res
Esempio n. 6
0
def main(argv, cfg):
    descr = 'show setup.json, dataset list, etc for jobs'
    parser = ArgumentParser(prog=argv.pop(0), description=descr)
    group = parser.add_mutually_exclusive_group()
    group.add_argument('-o',
                       '--output',
                       action='store_true',
                       help='show job output')
    group.add_argument('-O',
                       '--just-output',
                       action='store_true',
                       help='show only job output')
    group.add_argument('-P',
                       '--just-path',
                       action='store_true',
                       help='show only job path')
    parser.add_argument(
        'jobid',
        nargs='+',
        metavar='jobid/path/method',
        help='method shows the latest (current) job with that method\n' +
        '(i.e. the latest finished job with current source code)\n' +
        'you can use spec~ or spec~N to go back N current jobs\n' +
        'with that method or spec^ or spec^N to follow .previous')
    args = parser.parse_intermixed_args(argv)
    res = 0
    for path in args.jobid:
        try:
            job = name2job(cfg, path)
            if args.just_output:
                out = job.output()
                if out:
                    print(out, end='' if out.endswith('\n') else '\n')
            elif args.just_path:
                print(job.path)
            else:
                show(cfg.url, job, args.output)
        except JobNotFound as e:
            print(e)
            res = 1
        except Exception as e:
            if isinstance(e, IOError) and e.errno == errno.EPIPE:
                raise
            print_exc()
            print("Failed to show %r" % (path, ))
            res = 1
    return res
Esempio n. 7
0
def main(argv, cfg):
    usage = "%(prog)s [options] ds [ds [...]]"
    parser = ArgumentParser(prog=argv.pop(0), usage=usage)
    parser.add_argument('-c',
                        '--chain',
                        action='store_true',
                        help='list all datasets in a chain')
    parser.add_argument('-C',
                        '--non-empty-chain',
                        action='store_true',
                        help='list all non-empty datasets in a chain')
    parser.add_argument('-l',
                        '--list',
                        action='store_true',
                        help='list all datasets in a job with number of rows')
    parser.add_argument(
        '-L',
        '--chainedlist',
        action='store_true',
        help='list all datasets in a job with number of chained rows')
    parser.add_argument('-m',
                        '--suppress-minmax',
                        action='store_true',
                        help='do not print min/max column values')
    parser.add_argument('-n',
                        '--suppress-columns',
                        action='store_true',
                        help='do not print columns')
    parser.add_argument('-q',
                        '--suppress-errors',
                        action='store_true',
                        help='silently ignores bad input datasets/jobids')
    parser.add_argument(
        '-s',
        '--slices',
        action='store_true',
        help='list relative number of lines per slice in sorted order')
    parser.add_argument('-S',
                        '--chainedslices',
                        action='store_true',
                        help='same as -s but for full chain')
    parser.add_argument('-w',
                        '--location',
                        action='store_true',
                        help='show where (ds/filename) each column is stored')
    parser.add_argument(
        "dataset",
        nargs='+',
        help=
        'the job part of the dataset name can be specified in the same ways as for "ax job". you can use ds~ or ds~N to follow the chain N steps backwards, or ^ to follow .parent. this requires specifying the ds-name, so wd-1~ will not do this, but wd-1/default~ will.'
    )
    args = parser.parse_intermixed_args(argv)
    args.chain = args.chain or args.non_empty_chain

    def finish(badinput):
        if badinput and not args.suppress_errors:
            print('Error, failed to resolve datasets:', file=sys.stderr)
            for n, e in badinput:
                print('    %r: %s' % (
                    n,
                    e,
                ), file=sys.stderr)
            exit(1)
        exit()

    badinput = []

    if args.list or args.chainedlist:
        for n in args.dataset:
            try:
                try:
                    dsvec = name2ds(cfg, n).job.datasets
                except NoSuchWhateverError:
                    dsvec = name2job(cfg, n).datasets
            except Exception as e:
                badinput.append((n, e))
                dsvec = None
            if dsvec:
                print('%s' % (dsvec[0].job, ))
                v = []
                for ds in dsvec:
                    if args.chainedlist:
                        lines = sum(sum(x.lines) for x in ds.chain())
                    else:
                        lines = sum(ds.lines)
                    v.append((ds.name, '{:n}'.format(lines)))
                len_n, len_l = colwidth(v)
                template = "{0:%d}  ({1:>%d})" % (len_n, len_l)
                for name, numlines in sorted(v):
                    print('    ' + template.format(name, numlines))
        finish(badinput)

    for n in args.dataset:
        try:
            ds = name2ds(cfg, n)
        except NoSuchWhateverError as e:
            badinput.append((n, e))
            continue

        print(ds.quoted)
        if ds.parent:
            if isinstance(ds.parent, tuple):
                print("    Parents:")
                max_n = max(len(x.quoted) for x in ds.parent)
                template = "{1:%d}" % (max_n, )
                data = tuple(
                    (None, x.quoted) for ix, x in enumerate(ds.parent))
                data = sorted(data, key=lambda x: x[1])
                printcolwise(data, template, lambda x: x, minrows=8, indent=8)
            else:
                print("    Parent:", ds.parent.quoted)
        print("    Method:", quote(ds.job.method))
        if ds.filename:
            print("    Filename:", quote(ds.filename))
        if ds.previous:
            print("    Previous:", ds.previous.quoted)
        if ds.hashlabel is not None:
            print("    Hashlabel:", quote(ds.hashlabel))

        def prettyminmax(minval, maxval):
            if args.suppress_minmax:
                return ''
            s = '[%%%ds, %%%ds]' % (MINMAXWIDTH, MINMAXWIDTH)
            if minval is None:
                return ''
            elif isinstance(minval, float):

                def intdigits(x):
                    if isinf(x) or isnan(x):
                        return 3
                    return min(MINMAXWIDTH -
                               2, floor(log10(abs(x)) +
                                        1)) if x else (MINMAXWIDTH - 2) // 2

                ints = max(intdigits(minval), intdigits(maxval))
                if ints > 0:
                    format = "%% %d.%df" % (ints, MINMAXWIDTH - ints - 2)
                elif ints < -4:
                    format = "%% .%de" % (MINMAXWIDTH - 7, )
                else:
                    format = "%% .%df" % (MINMAXWIDTH - 3, )

                def format_or_int(v):
                    try:
                        i = int(v)
                        if v == i:
                            return i
                    except (OverflowError, ValueError):
                        pass
                    return locale.format_string(format, v)

                return s % (format_or_int(minval), format_or_int(maxval))
            elif isinstance(minval, int):
                return s % (minval, maxval)
            elif isinstance(minval, (date, time, datetime)):
                return s % (minval, maxval)
            else:
                return s % (minval, maxval)

        if not args.suppress_columns:
            print("    Columns:")
            name2typ = {
                n: c.type + '+None' if c.none_support else c.type
                for n, c in ds.columns.items()
            }
            len_n, len_t = colwidth(
                (quote(n), name2typ[n]) for n, c in ds.columns.items())
            if args.location:
                len_l = max(
                    len(quote(c.location)) for c in ds.columns.values())
                len_c = max(len(c.compression) for c in ds.columns.values())
                template = '        {2} {0:%d}  {1:%d}  {4:%d} {5:%d}  {3}' % (
                    len_n,
                    len_t,
                    len_l,
                    len_c,
                )
            else:
                template = '        {2} {0:%d}  {1:%d}  {3}' % (
                    len_n,
                    len_t,
                )
            chain = False
            if args.chainedslices or args.chain:
                chain = ds.chain()
            for n, c in sorted(ds.columns.items()):
                if chain:
                    minval, maxval = chain.min(n), chain.max(n)
                else:
                    minval, maxval = c.min, c.max
                hashdot = colour("*",
                                 "ds/highlight") if n == ds.hashlabel else " "
                print(
                    template.format(quote(n), name2typ[n], hashdot,
                                    prettyminmax(minval, maxval),
                                    quote(c.location), c.compression).rstrip())
            print("    {0:n} columns".format(len(ds.columns)))
        print("    {0:n} lines".format(sum(ds.lines)))

        if ds.previous or args.chain:
            chain = ds.chain()
            if args.non_empty_chain:
                print("    Full chain length {0:n}, from {1} to {2}".format(
                    len(chain), chain[0], chain[-1]))
                chain = [ds for ds in chain if sum(ds.lines)]
                print("    Filtered chain length {0:n}".format(len(chain)))
            if chain:
                if not args.non_empty_chain:
                    print("    Chain length {0:n}, from {1} to {2}".format(
                        len(chain), chain[0], chain[-1]))
                if args.chain:
                    data = tuple((ix, "%s/%s" % (x.job, x.name),
                                  "{:n}".format(sum(x.lines)))
                                 for ix, x in enumerate(chain))
                    max_n, max_l = colwidth(x[1:] for x in data)
                    template = "{0:3}: {1:%d} ({2:>%d})" % (max_n, max_l)
                    printcolwise(data,
                                 template,
                                 lambda x: (x[0], x[1], x[2]),
                                 minrows=8,
                                 indent=8)

        if args.slices or args.chainedslices:
            if args.chainedslices and ds.previous:
                data = (
                    (ix, '{:n}'.format(sum(x)), sum(x))
                    for ix, x in enumerate(zip(*(x.lines
                                                 for x in ds.chain()))))
                print('    Balance, lines per slice, full chain:')
            else:
                data = ((ix, '{:n}'.format(x), x)
                        for ix, x in enumerate(ds.lines))
                if ds.previous:
                    print('    Balance, lines per slice, tip dataset:')
                else:
                    print('    Balance, lines per slice:')
            data = sorted(data, key=lambda x: -x[2])
            s = sum(x[2] for x in data)
            len_n = max(len(x[1]) for x in data)
            template = "{0:3}: {1!s}%% ({2:>%d})" % (len_n, )
            printcolwise(
                data,
                template,
                lambda x:
                (x[0], locale.format_string("%6.2f", (100 * x[2] /
                                                      (s or 1e20))), x[1]),
                minrows=8,
                indent=8)
            print("    Max to average ratio: " +
                  locale.format_string("%2.3f", (max(x[2] for x in data) /
                                                 ((s or 1e20) / len(data)), )))

        if ds.previous:
            print("    {0:n} total lines in chain".format(
                sum(sum(ds.lines) for ds in chain)))

    finish(badinput)
Esempio n. 8
0
def main(argv, cfg):
    parser = ArgumentParser(
        prog=argv.pop(0),
        usage="%(prog)s [options] [script]",
        formatter_class=RawTextHelpFormatter,
    )
    parser.add_argument(
        '-f',
        '--flags',
        default='',
        help="comma separated list of flags",
    )
    parser.add_argument(
        '-q',
        '--quick',
        action='store_true',
        help="skip method updates and checking workdirs for new jobs",
    )
    parser.add_argument(
        '-c',
        '--concurrency',
        action='append',
        metavar='SPEC',
        help=
        "set max concurrency for methods, either method=N\nor just N to set for all other methods",
    )
    parser.add_argument(
        '-w',
        '--workdir',
        default=None,
        help="build in this workdir\nset_workdir() and workdir= override this.",
    )
    parser.add_argument(
        '-W',
        '--just_wait',
        action='store_true',
        help="just wait for running job, don't run any build script",
    )
    parser.add_argument('-p',
                        '--full-path',
                        action='store_true',
                        help="print full path to jobdirs")
    parser.add_argument('--verbose',
                        default='status',
                        help="verbosity style {no, status, dots, log}")
    parser.add_argument('--quiet',
                        action='store_true',
                        help="same as --verbose=no")
    parser.add_argument(
        '--horizon',
        default=None,
        help="time horizon - dates after this are not visible in\nurd.latest")
    parser.add_argument(
        'script',
        default='build',
        help=
        "build script to run. default \"build\".\nsearches under all method directories in alphabetical\norder if it does not contain a dot.\nprefixes build_ to last element unless specified.\npackage name suffixes are ok.\nso for example \"test_methods.tests\" expands to\n\"accelerator.test_methods.build_tests\".",
        nargs='?')

    options = parser.parse_args(argv)

    if '.' in options.script:
        options.package, options.script = options.script.rsplit('.', 1)
    else:
        options.package = None

    options.verbose = {
        'no': False,
        'status': True,
        'dots': 'dots',
        'log': 'log'
    }[options.verbose]
    if options.quiet: options.verbose = False

    concurrency_map = {}
    for v in options.concurrency or ():
        if v.isnumeric():
            concurrency_map['-default-'] = int(v)
        else:
            try:
                method, v = v.split('=', 1)
                concurrency_map[method] = int(v)
            except ValueError:
                raise Exception('Bad concurrency spec %r' % (v, ))
    options.concurrency_map = concurrency_map

    try:
        run_automata(options, cfg)
        return 0
    except (JobError, ServerError):
        # If it's a JobError we don't care about the local traceback,
        # we want to see the job traceback, and maybe know what line
        # we built the job on.
        # If it's a ServerError we just want the line and message.
        print_minimal_traceback()
    except Exception:
        # For the rest we still don't want to see stuff from this
        # file and earlier.
        print_user_part_traceback()
    return 1
Esempio n. 9
0
def main():
    # As of python 3.8 the default start_method is 'spawn' on macOS.
    # This doesn't work for us. 'fork' is fairly unsafe on macOS,
    # but it's better than not working at all. See
    # https://bugs.python.org/issue33725
    # for more information.
    import multiprocessing
    if hasattr(multiprocessing, 'set_start_method'):
        # If possible, make the forkserver (used by database updates) pre-import everthing
        if hasattr(multiprocessing, 'set_forkserver_preload'):
            multiprocessing.set_forkserver_preload(
                ['accelerator', 'accelerator.server'])
        multiprocessing.set_start_method('fork')

    from accelerator import g
    g.running = 'shell'

    from accelerator.autoflush import AutoFlush
    main_argv, argv = split_args(sys.argv[1:])
    sys.stdout = AutoFlush(sys.stdout)
    sys.stderr = AutoFlush(sys.stderr)

    aliases = {
        'cat': 'grep ""',
    }
    aliases.update(parse_user_config() or ())
    while argv and argv[0] in aliases:
        try:
            expanded = shlex.split(aliases[argv[0]])
        except ValueError as e:
            raise ValueError('Failed to expand alias %s (%r): %s' % (
                argv[0],
                aliases[argv[0]],
                e,
            ))
        more_main_argv, argv = split_args(expanded + argv[1:])
        main_argv.extend(more_main_argv)

    epilog = ['commands:', '']
    cmdlen = max(len(cmd) for cmd in COMMANDS)
    template = '  %%%ds  %%s' % (cmdlen, )
    for cmd, func in sorted(COMMANDS.items()):
        epilog.append(template % (
            cmd,
            func.help,
        ))
    epilog.append('')
    epilog.append('aliases:')
    epilog.extend('%s = %s' % item for item in sorted(aliases.items()))
    epilog.append('')
    epilog.append('use %(prog)s <command> --help for <command> usage')
    parser = ArgumentParser(
        usage='%(prog)s [--config CONFIG_FILE] command [args]',
        epilog='\n'.join(epilog),
        formatter_class=RawDescriptionHelpFormatter,
    )
    parser.add_argument('--config',
                        metavar='CONFIG_FILE',
                        help='configuration file')
    parser.add_argument('--version',
                        action='store_true',
                        help='alias for the version command')
    args = parser.parse_args(main_argv)
    if args.version:
        sys.exit(cmd_version(()))
    args.command = argv.pop(0) if argv else None
    if args.command not in COMMANDS:
        parser.print_help(file=sys.stderr)
        print(file=sys.stderr)
        if args.command is not None:
            print('Unknown command "%s"' % (args.command, ), file=sys.stderr)
        sys.exit(2)
    config_fn = args.config
    if args.command == 'init':
        config_fn = False
    cmd = COMMANDS[args.command]
    debug_cmd = getattr(cmd, 'is_debug', False)
    try:
        setup(config_fn, debug_cmd)
        argv.insert(0, '%s %s' % (
            basename(sys.argv[0]),
            args.command,
        ))
        return cmd(argv)
    except UserError as e:
        print(e, file=sys.stderr)
        return 1
    except IOError as e:
        if e.errno == errno.EPIPE and debug_cmd:
            return
        else:
            raise
Esempio n. 10
0
def main(argv, config):
    g.running = 'server'

    parser = ArgumentParser(prog=argv.pop(0))
    parser.add_argument('--debug', action='store_true')
    parser.add_argument(
        '--debuggable',
        action='store_true',
        help=
        'make breakpoint() work in methods. note that this makes a failing method kill the whole server.'
    )
    options = parser.parse_args(argv)

    config.debuggable = options.debuggable

    # all forks belong to the same happy family
    try:
        os.setpgrp()
    except OSError:
        print(
            "Failed to create process group - there is probably already one (daemontools).",
            file=sys.stderr)

    # Set a low (but not too low) open file limit to make
    # dispatch.update_valid_fds faster.
    # The runners will set the highest limit they can
    # before actually running any methods.
    r1, r2 = resource.getrlimit(resource.RLIMIT_NOFILE)
    r1 = min(r1, r2, 1024)
    resource.setrlimit(resource.RLIMIT_NOFILE, (r1, r2))

    # Start the board-server in a separate process so it can't interfere.
    # Even if it dies we don't care.
    try:
        if not isinstance(config.board_listen, tuple):
            # Don't bother if something is already listening.
            check_socket(config.board_listen)
        Process(target=board.run, args=(config, ), name='board-server').start()
    except Exception:
        pass

    iowrapper.main()

    # setup statmsg sink and tell address using ENV
    statmsg_rd, statmsg_wr = socket.socketpair(socket.AF_UNIX,
                                               socket.SOCK_DGRAM)
    os.environ['BD_STATUS_FD'] = str(statmsg_wr.fileno())

    def buf_up(fh, opt):
        sock = socket.fromfd(fh.fileno(), socket.AF_UNIX, socket.SOCK_DGRAM)
        sock.setsockopt(socket.SOL_SOCKET, opt, 256 * 1024)
        # does not close fh, because fromfd dups the fd (but not the underlying socket)
        sock.close()

    buf_up(statmsg_wr, socket.SO_SNDBUF)
    buf_up(statmsg_rd, socket.SO_RCVBUF)

    t = DeadlyThread(target=statmsg_sink,
                     args=(statmsg_rd, ),
                     name="statmsg sink")
    t.daemon = True
    t.start()

    # do all main-stuff, i.e. run server
    sys.stdout = autoflush.AutoFlush(sys.stdout)
    sys.stderr = autoflush.AutoFlush(sys.stderr)
    atexit.register(exitfunction)
    signal.signal(signal.SIGTERM, exitfunction)
    signal.signal(signal.SIGINT, exitfunction)

    signal.signal(signal.SIGUSR1, siginfo)
    signal.siginterrupt(signal.SIGUSR1, False)
    if hasattr(signal, 'pthread_sigmask'):
        signal.pthread_sigmask(signal.SIG_UNBLOCK, {signal.SIGUSR1})
    if hasattr(signal, 'SIGINFO'):
        signal.signal(signal.SIGINFO, siginfo)
        signal.siginterrupt(signal.SIGINFO, False)

    if isinstance(config.listen, tuple):
        server = ThreadedHTTPServer(config.listen, XtdHandler)
    else:
        check_socket(config.listen)
        # We want the socket to be world writeable, protect it with dir permissions.
        u = os.umask(0)
        server = ThreadedUnixHTTPServer(config.listen, XtdHandler)
        os.umask(u)

    if config.get('urd_local'):
        from accelerator import urd
        t = DeadlyThread(target=urd.main,
                         args=(['urd', '--quiet',
                                '--allow-passwordless'], config),
                         name='urd')
        t.daemon = True
        t.start()

    ctrl = control.Main(config, options, config.url)
    print()
    ctrl.print_workdirs()
    print()

    XtdHandler.ctrl = ctrl
    job_tracking[None].workdir = ctrl.target_workdir

    for n in (
            "project_directory",
            "result_directory",
            "input_directory",
    ):
        v = config.get(n)
        n = n.replace("_", " ")
        print("%17s: %s" % (
            n,
            v,
        ))
    for n in (
            "board",
            "urd",
    ):
        v = config.get(n + '_listen')
        if v and not config.get(n + '_local', True):
            extra = ' (remote)'
        else:
            extra = ''
        print("%17s: %s%s" % (
            n,
            v,
            extra,
        ))
    print()

    print("Serving on %s\n" % (config.listen, ), file=sys.stderr)
    server.serve_forever()
Esempio n. 11
0
def main(argv, cfg):
    usage = "%(prog)s [options] pattern ds [ds [...]] [column [column [...]]"
    parser = ArgumentParser(usage=usage, prog=argv.pop(0))
    parser.add_argument(
        '-c',
        '--chain',
        action='store_true',
        help="follow dataset chains",
    )
    parser.add_argument(
        '-C',
        '--color',
        action='store_true',
        help="color matched text",
    )
    parser.add_argument(
        '-i',
        '--ignore-case',
        action='store_true',
        help="case insensitive pattern",
    )
    parser.add_argument(
        '-H',
        '--headers',
        action='store_true',
        help="print column names before output (and on each change)",
    )
    parser.add_argument(
        '-o',
        '--ordered',
        action='store_true',
        help="output in order (one slice at a time)",
    )
    parser.add_argument(
        '-g',
        '--grep',
        action='append',
        help="grep this column only, can be specified multiple times",
        metavar='COLUMN')
    parser.add_argument(
        '-s',
        '--slice',
        action='append',
        help="grep this slice only, can be specified multiple times",
        type=int)
    parser.add_argument('-t',
                        '--separator',
                        help="field separator (default tab)",
                        default='\t')
    parser.add_argument(
        '-D',
        '--show-dataset',
        action='store_true',
        help="show dataset on matching lines",
    )
    parser.add_argument(
        '-S',
        '--show-sliceno',
        action='store_true',
        help="show sliceno on matching lines",
    )
    parser.add_argument(
        '-L',
        '--show-lineno',
        action='store_true',
        help="show lineno (per slice) on matching lines",
    )
    parser.add_argument('pattern')
    parser.add_argument(
        'dataset', help='can be specified in the same ways as for "ax ds"')
    parser.add_argument('columns', nargs='*', default=[])
    args = parser.parse_intermixed_args(argv)

    pat_s = re.compile(args.pattern, re.IGNORECASE if args.ignore_case else 0)
    pat_b = re.compile(args.pattern.encode('utf-8'),
                       re.IGNORECASE if args.ignore_case else 0)
    datasets = [name2ds(cfg, args.dataset)]
    columns = []

    separator_s = args.separator
    separator_b = separator_s.encode('utf-8')

    for ds_or_col in args.columns:
        if columns:
            columns.append(ds_or_col)
        else:
            try:
                datasets.append(name2ds(cfg, ds_or_col))
            except Exception:
                columns.append(ds_or_col)

    if not datasets:
        parser.print_help(file=sys.stderr)
        return 1

    grep_columns = set(args.grep or ())
    if grep_columns == set(columns):
        grep_columns = None

    if args.slice:
        want_slices = []
        for s in args.slice:
            assert 0 <= s < g.slices, "Slice %d not available" % (s, )
            if s not in want_slices:
                want_slices.append(s)
    else:
        want_slices = list(range(g.slices))

    if args.chain:
        datasets = list(chain.from_iterable(ds.chain() for ds in datasets))

    if columns:
        bad = False
        for ds in datasets:
            missing = set(columns) - set(ds.columns)
            if missing:
                print('ERROR: %s does not have columns %r' % (
                    ds,
                    missing,
                ),
                      file=sys.stderr)
                bad = True
        if bad:
            return 1

    def grep(ds, sliceno):
        # Use bytes for everything if anything is bytes, str otherwise. (For speed.)
        if any(ds.columns[col].backing_type == 'bytes'
               for col in (grep_columns or columns or ds.columns)):

            def strbytes(v):
                return str(v).encode('utf-8', 'replace')

            def mk_iter(col):
                if ds.columns[col].backing_type in (
                        'bytes',
                        'unicode',
                        'ascii',
                ):
                    return ds._column_iterator(sliceno, col, _type='bytes')
                else:
                    return imap(strbytes, ds._column_iterator(sliceno, col))

            chk = pat_b.search
        else:

            def mk_iter(col):
                if ds.columns[col].backing_type in (
                        'unicode',
                        'ascii',
                ):
                    return ds._column_iterator(sliceno, col, _type='unicode')
                else:
                    return imap(str, ds._column_iterator(sliceno, col))

            chk = pat_s.search

        def fmt(v):
            if not isinstance(v, (unicode, bytes)):
                v = str(v)
            if isinstance(v, unicode):
                v = v.encode('utf-8', 'replace')
            return v

        def color(item):
            pos = 0
            parts = []
            for m in pat_b.finditer(item):
                a, b = m.span()
                parts.extend((item[pos:a], b'\x1b[31m', item[a:b], b'\x1b[m'))
                pos = b
            parts.append(item[pos:])
            return b''.join(parts)

        prefix = []
        if args.show_dataset:
            prefix.append(ds.encode('utf-8'))
        if args.show_sliceno:
            prefix.append(str(sliceno).encode('utf-8'))
        prefix = tuple(prefix)

        def show(prefix, items):
            items = map(fmt, items)
            if args.color:
                items = map(color, items)
            # This will be atomic if the line is not too long
            # (at least up to PIPE_BUF bytes, should be at least 512).
            write(1, separator_b.join(prefix + tuple(items)) + b'\n')

        if grep_columns and grep_columns != set(columns or ds.columns):
            grep_iter = izip(*(mk_iter(col) for col in grep_columns))
            lines_iter = ds.iterate(sliceno, columns)
        else:
            grep_iter = repeat(None)
            lines_iter = izip(*(mk_iter(col)
                                for col in (columns or sorted(ds.columns))))
        lines = izip(grep_iter, lines_iter)
        if args.show_lineno:
            for lineno, (grep_items, items) in enumerate(lines):
                if any(imap(chk, grep_items or items)):
                    show(prefix + (str(lineno).encode('utf-8'), ), items)
        else:
            for grep_items, items in lines:
                if any(imap(chk, grep_items or items)):
                    show(prefix, items)

    def one_slice(sliceno, q, wait_for):
        try:
            if q:
                q.get()
            for ds in datasets:
                if ds in wait_for:
                    q.task_done()
                    q.get()
                grep(ds, sliceno)
        except KeyboardInterrupt:
            return
        except IOError as e:
            if e.errno == errno.EPIPE:
                return
            else:
                raise
        finally:
            # Make sure we are joinable
            try:
                q.task_done()
            except Exception:
                pass

    headers_prefix = []
    if args.show_dataset:
        headers_prefix.append('[DATASET]')
    if args.show_sliceno:
        headers_prefix.append('[SLICE]')
    if args.show_lineno:
        headers_prefix.append('[LINE]')

    headers = {}
    if args.headers:
        if columns:
            current_headers = columns
        else:
            current_headers = None
            for ds in datasets:
                candidate_headers = sorted(ds.columns)
                if candidate_headers != current_headers:
                    headers[ds] = current_headers = candidate_headers
            current_headers = headers.pop(datasets[0])

        def show_headers(headers):
            print('\x1b[34m' + separator_s.join(headers_prefix + headers) +
                  '\x1b[m')

        show_headers(current_headers)

    queues = []
    children = []
    if not args.ordered:
        q = None
        wait_for = set(headers)
        for sliceno in want_slices[1:]:
            if wait_for:
                q = JoinableQueue()
                q.put(None)
                queues.append(q)
            p = Process(
                target=one_slice,
                args=(sliceno, q, wait_for),
                name='slice-%d' % (sliceno, ),
            )
            p.daemon = True
            p.start()
            children.append(p)
        want_slices = want_slices[:1]

    try:
        for ds in datasets:
            if ds in headers:
                for q in queues:
                    q.join()
                show_headers(headers.pop(ds))
                for q in queues:
                    q.put(None)
            for sliceno in want_slices:
                grep(ds, sliceno)
        for c in children:
            c.join()
    except KeyboardInterrupt:
        print()
Esempio n. 12
0
def main():
    # Several commands use SIGUSR1 which (naturally...) defaults to killing the
    # process, so start by blocking that to minimise the race time.
    if hasattr(signal, 'pthread_sigmask'):
        signal.pthread_sigmask(signal.SIG_BLOCK, {signal.SIGUSR1})
    else:
        # Or if we can't block it, just ignore it.
        signal.signal(signal.SIGUSR1, signal.SIG_IGN)

    # As of python 3.8 the default start_method is 'spawn' on macOS.
    # This doesn't work for us. 'fork' is fairly unsafe on macOS,
    # but it's better than not working at all. See
    # https://bugs.python.org/issue33725
    # for more information.
    import multiprocessing
    if hasattr(multiprocessing, 'set_start_method'):
        # If possible, make the forkserver (used by database updates) pre-import everthing
        if hasattr(multiprocessing, 'set_forkserver_preload'):
            multiprocessing.set_forkserver_preload(
                ['accelerator', 'accelerator.server'])
        multiprocessing.set_start_method('fork')

    from accelerator import g
    g.running = 'shell'

    from accelerator.autoflush import AutoFlush
    main_argv, argv = split_args(sys.argv[1:])
    sys.stdout = AutoFlush(sys.stdout)
    sys.stderr = AutoFlush(sys.stderr)

    # configuration defaults
    aliases = {
        'cat': 'grep -e ""',
    }
    colour_d = {
        'warning': ('RED', ),
        'highlight': ('BOLD', ),
        'grep/highlight': ('RED', ),
        'info': ('BRIGHTBLUE', ),
        'infohighlight': (
            'BOLD',
            'BRIGHTBLUE',
        ),
        'separator': (
            'CYAN',
            'UNDERLINE',
        ),
        'header': (
            'BRIGHTBLUE',
            'BOLD',
        ),
    }
    parse_user_config(aliases, colour_d)
    colour._names.update(colour_d)

    used_aliases = []
    while argv and argv[0] in aliases:
        alias = argv[0]
        if alias == 'noalias':  # save the user from itself
            break
        try:
            expanded = shlex.split(aliases[alias])
        except ValueError as e:
            raise ValueError('Failed to expand alias %s (%r): %s' % (
                argv[0],
                aliases[argv[0]],
                e,
            ))
        more_main_argv, argv = split_args(expanded + argv[1:])
        main_argv.extend(more_main_argv)
        if expanded and alias == expanded[0]:
            break
        used_aliases.append(alias)
        if alias in used_aliases[:-1]:
            raise ValueError('Alias loop: %r' % (used_aliases, ))

    while argv and argv[0] == 'noalias':
        argv.pop(0)

    epilog = ['commands:', '']
    cmdlen = max(len(cmd) for cmd in COMMANDS)
    template = '  %%%ds  %%s' % (cmdlen, )
    for cmd, func in sorted(COMMANDS.items()):
        epilog.append(template % (
            cmd,
            func.help,
        ))
    epilog.append('')
    epilog.append('aliases:')
    epilog.extend('  %s = %s' % item for item in sorted(aliases.items()))
    epilog.append('')
    epilog.append('use "' +
                  colour('%(prog)s <command> --help', 'help/highlight') +
                  '" for <command> usage')
    epilog.append('try "' + colour('%(prog)s intro', 'help/highlight') +
                  '" for an introduction')
    parser = ArgumentParser(
        usage='%(prog)s [--config CONFIG_FILE] command [args]',
        epilog='\n'.join(epilog),
        formatter_class=RawDescriptionHelpFormatter,
    )
    parser.add_argument('--config',
                        metavar='CONFIG_FILE',
                        help='configuration file')
    parser.add_argument('--version',
                        action='store_true',
                        help='alias for the version command')
    args = parser.parse_args(main_argv)
    if args.version:
        sys.exit(cmd_version(()))
    args.command = argv.pop(0) if argv else None
    if args.command not in COMMANDS:
        parser.print_help(file=sys.stderr)
        if args.command is not None:
            print(file=sys.stderr)
            print('Unknown command "%s"' % (args.command, ), file=sys.stderr)
        sys.exit(2)
    config_fn = args.config
    if args.command in (
            'init',
            'intro',
            'version',
    ):
        config_fn = False
    cmd = COMMANDS[args.command]
    debug_cmd = getattr(cmd, 'is_debug', False)
    try:
        setup(config_fn, debug_cmd)
        argv.insert(0, '%s %s' % (
            basename(sys.argv[0]),
            args.command,
        ))
        return cmd(argv)
    except UserError as e:
        print(e, file=sys.stderr)
        return 1
    except OSError as e:
        if e.errno == errno.EPIPE:
            return 1
        else:
            raise
    except KeyboardInterrupt:
        # Exiting with KeyboardInterrupt causes python to print a traceback.
        # We don't want that, but we do want to exit from SIGINT (so the
        # calling process can know that happened).
        signal.signal(signal.SIGINT, signal.SIG_DFL)
        os.kill(os.getpid(), signal.SIGINT)
        # If that didn't work let's re-raise the KeyboardInterrupt.
        raise
Esempio n. 13
0
def main(argv, cfg):
	parser = ArgumentParser(
		prog=argv.pop(0),
		usage="%(prog)s [options] [script]",
		formatter_class=RawTextHelpFormatter,
	)
	parser.add_argument('-f', '--flags',    default='',          help="comma separated list of flags", )
	parser.add_argument('-q', '--quick',    action='store_true', help="skip method updates and checking workdirs for new jobs", )
	parser.add_argument('-w', '--workdir',  default=None,        help="build in this workdir\nset_workdir() and workdir= override this.", )
	parser.add_argument('-W', '--just_wait',action='store_true', help="just wait for running job, don't run any build script", )
	parser.add_argument('-F', '--fullpath', action='store_true', help="print full path to jobdirs")
	parser.add_argument('--verbose',        default='status',    help="verbosity style {no, status, dots, log}")
	parser.add_argument('--quiet',          action='store_true', help="same as --verbose=no")
	parser.add_argument('--horizon',        default=None,        help="time horizon - dates after this are not visible in\nurd.latest")
	parser.add_argument('script',           default='build'   ,  help="build script to run. default \"build\".\nsearches under all method directories in alphabetical\norder if it does not contain a dot.\nprefixes build_ to last element unless specified.\npackage name suffixes are ok.\nso for example \"test_methods.tests\" expands to\n\"accelerator.test_methods.build_tests\".", nargs='?')

	options = parser.parse_args(argv)

	if '.' in options.script:
		options.package, options.script = options.script.rsplit('.', 1)
	else:
		options.package = None

	options.verbose = {'no': False, 'status': True, 'dots': 'dots', 'log': 'log'}[options.verbose]
	if options.quiet: options.verbose = False

	try:
		run_automata(options, cfg)
		return 0
	except (JobError, ServerError):
		# If it's a JobError we don't care about the local traceback,
		# we want to see the job traceback, and maybe know what line
		# we built the job on.
		# If it's a ServerError we just want the line and message.
		print_minimal_traceback()
	return 1
Esempio n. 14
0
def main(argv):
	from os import makedirs, listdir, chdir
	from os.path import exists, join, realpath
	from sys import version_info
	from argparse import RawDescriptionHelpFormatter
	from accelerator.compat import ArgumentParser
	from accelerator.error import UserError

	parser = ArgumentParser(
		prog=argv.pop(0),
		description=r'''
			creates an accelerator project directory.
			defaults to the current directory.
			creates accelerator.conf, a method dir, a workdir and result dir.
			both the method directory and workdir will be named <NAME>,
			"dev" by default.
		'''.replace('\t', ''),
		formatter_class=RawDescriptionHelpFormatter,
	)
	parser.add_argument('--slices', default=None, type=int, help='override slice count detection')
	parser.add_argument('--name', default='dev', help='name of method dir and workdir, default "dev"')
	parser.add_argument('--input', default='# /some/path where you want import methods to look.', help='input directory')
	parser.add_argument('--force', action='store_true', help='go ahead even though directory is not empty, or workdir exists with incompatible slice count')
	parser.add_argument('directory', default='.', help='project directory to create. default "."', metavar='DIR', nargs='?')
	options = parser.parse_args(argv)

	assert options.name
	assert '/' not in options.name
	if not options.input.startswith('#'):
		options.input = quote(realpath(options.input))
	prefix = realpath(options.directory)
	workdir = join(prefix, 'workdirs', options.name)
	slices_conf = join(workdir, '.slices')
	try:
		with open(slices_conf, 'r') as fh:
			workdir_slices = int(fh.read())
	except IOError:
		workdir_slices = None
	if workdir_slices and options.slices is None:
		options.slices = workdir_slices
	if options.slices is None:
		from multiprocessing import cpu_count
		options.slices = cpu_count()
	if workdir_slices and workdir_slices != options.slices and not options.force:
		raise UserError('Workdir %r has %d slices, refusing to continue with %d slices' % (workdir, workdir_slices, options.slices,))

	if not options.force and exists(options.directory) and listdir(options.directory):
		raise UserError('Directory %r is not empty.' % (options.directory,))
	if not exists(options.directory):
		makedirs(options.directory)
	chdir(options.directory)
	for dir_to_make in ('.socket.dir', 'urd.db',):
		if not exists(dir_to_make):
			makedirs(dir_to_make, 0o750)
	for dir_to_make in (workdir, 'results',):
		if not exists(dir_to_make):
			makedirs(dir_to_make)
	with open(slices_conf, 'w') as fh:
		fh.write('%d\n' % (options.slices,))
	method_dir = options.name
	if not exists(method_dir):
		makedirs(method_dir)
	with open(join(method_dir, '__init__.py'), 'w') as fh:
		pass
	with open(join(method_dir, 'methods.conf'), 'w') as fh:
		fh.write('example\n')
	with open(join(method_dir, 'a_example.py'), 'w') as fh:
		fh.write(a_example)
	with open(join(method_dir, 'build.py'), 'w') as fh:
		fh.write(build_script)
	with open('accelerator.conf', 'w') as fh:
		fh.write(config_template.format(
			name=quote(options.name),
			slices=options.slices,
			input=options.input,
			major=version_info.major,
			minor=version_info.minor,
			micro=version_info.micro,
		))
Esempio n. 15
0
def main(argv):
	from os import makedirs, listdir, chdir
	from os.path import exists, join, realpath
	from sys import version_info
	from argparse import RawTextHelpFormatter
	from accelerator.compat import ArgumentParser
	from accelerator.error import UserError
	from accelerator.extras import DotDict

	parser = ArgumentParser(
		prog=argv.pop(0),
		description=r'''
			creates an accelerator project directory.
			defaults to the current directory.
			creates accelerator.conf, a method dir, a workdir and result dir.
			both the method directory and workdir will be named <NAME>,
			"dev" by default.
		'''.replace('\t', ''),
		formatter_class=RawTextHelpFormatter,
	)
	parser.add_argument('--slices', default=None, type=int, help='override slice count detection')
	parser.add_argument('--name', default='dev', help='name of method dir and workdir, default "dev"')
	parser.add_argument('--input', default='# /some/path where you want import methods to look.', help='input directory')
	parser.add_argument('--force', action='store_true', help='go ahead even though directory is not empty, or workdir\nexists with incompatible slice count')
	parser.add_argument('--tcp', default=False, metavar='HOST/PORT', nargs='?', help='listen on TCP instead of unix sockets.\nspecify HOST (can be IP) to listen on that host\nspecify PORT to use range(PORT, PORT + 3)\nspecify both as HOST:PORT')
	parser.add_argument('--no-git', action='store_true', help='don\'t create git repository')
	parser.add_argument('directory', default='.', help='project directory to create. default "."', metavar='DIR', nargs='?')
	options = parser.parse_args(argv)

	assert options.name
	assert '/' not in options.name

	if options.tcp is False:
		listen = DotDict(
			board='.socket.dir/board',
			server='.socket.dir/server',
			urd='.socket.dir/urd',
		)
	else:
		hostport = options.tcp or ''
		if hostport.endswith(']'): # ipv6
			host, port = hostport, None
		elif ':' in hostport:
			host, port = hostport.rsplit(':', 1)
		elif hostport.isdigit():
			host, port = '', hostport
		else:
			host, port = hostport, None
		if port:
			port = int(port)
		else:
			port = find_free_ports(0x3000, 0x8000)
		listen = DotDict(
			server='%s:%d' % (host, port,),
			board='%s:%d' % (host, port + 1,),
			urd='%s:%d' % (host, port + 2,),
		)

	if not options.input.startswith('#'):
		options.input = quote(realpath(options.input))
	prefix = realpath(options.directory)
	workdir = join(prefix, 'workdirs', options.name)
	slices_conf = join(workdir, '.slices')
	try:
		with open(slices_conf, 'r') as fh:
			workdir_slices = int(fh.read())
	except IOError:
		workdir_slices = None
	if workdir_slices and options.slices is None:
		options.slices = workdir_slices
	if options.slices is None:
		from multiprocessing import cpu_count
		options.slices = cpu_count()
	if workdir_slices and workdir_slices != options.slices and not options.force:
		raise UserError('Workdir %r has %d slices, refusing to continue with %d slices' % (workdir, workdir_slices, options.slices,))

	if not options.force and exists(options.directory) and listdir(options.directory):
		raise UserError('Directory %r is not empty.' % (options.directory,))
	if not exists(options.directory):
		makedirs(options.directory)
	chdir(options.directory)
	for dir_to_make in ('.socket.dir', 'urd.db',):
		if not exists(dir_to_make):
			makedirs(dir_to_make, 0o750)
	for dir_to_make in (workdir, 'results',):
		if not exists(dir_to_make):
			makedirs(dir_to_make)
	with open(slices_conf, 'w') as fh:
		fh.write('%d\n' % (options.slices,))
	method_dir = options.name
	if not exists(method_dir):
		makedirs(method_dir)
	with open(join(method_dir, '__init__.py'), 'w') as fh:
		pass
	with open(join(method_dir, 'methods.conf'), 'w') as fh:
		fh.write('example\n')
	with open(join(method_dir, 'a_example.py'), 'w') as fh:
		fh.write(a_example)
	with open(join(method_dir, 'build.py'), 'w') as fh:
		fh.write(build_script)
	with open('accelerator.conf', 'w') as fh:
		fh.write(config_template.format(
			name=quote(options.name),
			slices=options.slices,
			input=options.input,
			major=version_info.major,
			minor=version_info.minor,
			micro=version_info.micro,
			listen=DotDict({k: quote(v) for k, v in listen.items()}),
		))
	if not options.no_git:
		git(method_dir)
Esempio n. 16
0
def main(argv, cfg):
    parser = ArgumentParser(
        usage="%(prog)s [options] pattern ds [ds [...]] [column [column [...]]",
        prog=argv.pop(0),
    )
    parser.add_argument(
        '-c',
        '--chain',
        action='store_true',
        help="follow dataset chains",
    )
    parser.add_argument(
        '--colour',
        '--color',
        nargs='?',
        const='always',
        choices=['auto', 'never', 'always'],
        type=str.lower,
        help="colour matched text. can be auto, never or always",
        metavar='WHEN',
    )
    parser.add_argument(
        '-i',
        '--ignore-case',
        action='store_true',
        help="case insensitive pattern",
    )
    parser.add_argument(
        '-H',
        '--headers',
        action='store_true',
        help="print column names before output (and on each change)",
    )
    parser.add_argument(
        '-O',
        '--ordered',
        action='store_true',
        help="output in order (one slice at a time)",
    )
    parser.add_argument(
        '-g',
        '--grep',
        action='append',
        help="grep this column only, can be specified multiple times",
        metavar='COLUMN')
    parser.add_argument(
        '-s',
        '--slice',
        action='append',
        help="grep this slice only, can be specified multiple times",
        type=int)
    parser.add_argument(
        '-D',
        '--show-dataset',
        action='store_true',
        help="show dataset on matching lines",
    )
    parser.add_argument(
        '-S',
        '--show-sliceno',
        action='store_true',
        help="show sliceno on matching lines",
    )
    parser.add_argument(
        '-L',
        '--show-lineno',
        action='store_true',
        help="show lineno (per slice) on matching lines",
    )
    supported_formats = (
        'csv',
        'raw',
        'json',
    )
    parser.add_argument(
        '-f',
        '--format',
        default='csv',
        choices=supported_formats,
        help="output format, csv (default) / " +
        ' / '.join(supported_formats[1:]),
        metavar='FORMAT',
    )
    parser.add_argument(
        '-t',
        '--separator',
        help="field separator, default tab / tab-like spaces",
    )
    parser.add_argument('pattern')
    parser.add_argument(
        'dataset', help='can be specified in the same ways as for "ax ds"')
    parser.add_argument('columns', nargs='*', default=[])
    args = parser.parse_intermixed_args(argv)

    pat_s = re.compile(args.pattern, re.IGNORECASE if args.ignore_case else 0)
    datasets = [name2ds(cfg, args.dataset)]
    columns = []

    for ds_or_col in args.columns:
        if columns:
            columns.append(ds_or_col)
        else:
            try:
                datasets.append(name2ds(cfg, ds_or_col))
            except Exception:
                columns.append(ds_or_col)

    if not datasets:
        parser.print_help(file=sys.stderr)
        return 1

    grep_columns = set(args.grep or ())
    if grep_columns == set(columns):
        grep_columns = None

    if args.slice:
        want_slices = []
        for s in args.slice:
            assert 0 <= s < g.slices, "Slice %d not available" % (s, )
            if s not in want_slices:
                want_slices.append(s)
    else:
        want_slices = list(range(g.slices))

    if args.chain:
        datasets = list(chain.from_iterable(ds.chain() for ds in datasets))

    if columns or grep_columns:
        bad = False
        need_cols = set(columns)
        if grep_columns:
            need_cols.update(grep_columns)
        for ds in datasets:
            missing = need_cols - set(ds.columns)
            if missing:
                print('ERROR: %s does not have columns %r' % (
                    ds,
                    missing,
                ),
                      file=sys.stderr)
                bad = True
        if bad:
            return 1

    # never and always override env settings, auto (default) sets from env/tty
    if args.colour == 'never':
        colour.disable()
        highlight_matches = False
    elif args.colour == 'always':
        colour.enable()
        highlight_matches = True
    else:
        highlight_matches = colour.enabled

    # Don't highlight everything when just trying to cat
    if args.pattern == '':
        highlight_matches = False

    separator = args.separator
    if separator is None and not sys.stdout.isatty():
        separator = '\t'

    if separator is None:
        # special case where we try to be like a tab, but with spaces.
        # this is useful because terminals typically don't style tabs.
        def separate(items, lens):
            things = []
            for item, item_len in zip(items, lens):
                things.append(item)
                spaces = 8 - (item_len % 8)
                things.append(colour(' ' * spaces, 'cyan', 'underline'))
            return ''.join(things[:-1])

        separator = '\t'
    else:
        separator_coloured = colour(separator, 'cyan', 'underline')

        def separate(items, lens):
            return separator_coloured.join(items)

    def json_default(obj):
        if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)):
            return str(obj)
        elif isinstance(obj, complex):
            return [obj.real, obj.imag]
        else:
            return repr(obj)

    if args.format == 'csv':

        def escape_item(item):
            if item and (separator in item or item[0] in '\'"'
                         or item[-1] in '\'"'):
                return '"' + item.replace('\n', '\\n').replace('"', '""') + '"'
            else:
                return item.replace('\n', '\\n')

        errors = 'surrogatepass'
    else:
        escape_item = None
        errors = 'replace' if PY2 else 'surrogateescape'

    def grep(ds, sliceno):
        def no_conv(v):
            return v

        def mk_conv(col):
            if ds.columns[col].type in (
                    'bytes',
                    'unicode',
                    'ascii',
            ):
                if not ds.columns[col].none_support:
                    return no_conv
            return unicode

        chk = pat_s.search

        def mk_iter(col):
            if ds.columns[col].type == 'ascii':
                it = ds._column_iterator(sliceno, col, _type='unicode')
            else:
                it = ds._column_iterator(sliceno, col)
            if ds.columns[col].type == 'bytes':
                errors = 'replace' if PY2 else 'surrogateescape'
                if ds.columns[col].none_support:
                    it = (None if v is None else v.decode('utf-8', errors)
                          for v in it)
                else:
                    it = (v.decode('utf-8', errors) for v in it)
            return it

        def colour_item(item):
            pos = 0
            parts = []
            for m in pat_s.finditer(item):
                a, b = m.span()
                parts.extend((item[pos:a], colour.red(item[a:b])))
                pos = b
            parts.append(item[pos:])
            return ''.join(parts)

        if args.format == 'json':
            prefix = {}
            dumps = json.JSONEncoder(ensure_ascii=False,
                                     default=json_default).encode
            if args.show_dataset:
                prefix['dataset'] = ds
            if args.show_sliceno:
                prefix['sliceno'] = sliceno

            def show():
                d = dict(zip(used_columns, items))
                if args.show_lineno:
                    prefix['lineno'] = lineno
                if prefix:
                    prefix['data'] = d
                    d = prefix
                return dumps(d).encode('utf-8', 'surrogatepass')
        else:
            prefix = []
            if args.show_dataset:
                prefix.append(ds)
            if args.show_sliceno:
                prefix.append(str(sliceno))
            prefix = tuple(prefix)

            def show():
                data = list(prefix)
                if args.show_lineno:
                    data.append(unicode(lineno))
                if PY2:
                    show_items = (v if isinstance(v, unicode) else
                                  str(v).decode('utf-8', 'replace')
                                  for v in items)
                else:
                    show_items = map(str, items)
                show_items = list(show_items)
                lens = (len(item) for item in data + show_items)
                if highlight_matches:
                    show_items = list(map(colour_item, show_items))
                if escape_item:
                    lens_unesc = (len(item) for item in data + show_items)
                    show_items = list(map(escape_item, show_items))
                    lens_esc = (len(item) for item in data + show_items)
                    lens = (
                        l + esc - unesc
                        for l, unesc, esc in zip(lens, lens_unesc, lens_esc))
                data.extend(show_items)
                return separate(data, lens).encode('utf-8', errors)

        used_columns = columns or sorted(ds.columns)
        if grep_columns and grep_columns != set(used_columns):
            grep_iter = izip(*(mk_iter(col) for col in grep_columns))
            conv_items = [mk_conv(col) for col in grep_columns]
        else:
            grep_iter = repeat(None)
            conv_items = [mk_conv(col) for col in used_columns]
        lines_iter = izip(*(mk_iter(col) for col in used_columns))
        for lineno, (grep_items,
                     items) in enumerate(izip(grep_iter, lines_iter)):
            if any(
                    chk(conv(item))
                    for conv, item in izip(conv_items, grep_items or items)):
                # This will be atomic if the line is not too long
                # (at least up to PIPE_BUF bytes, should be at least 512).
                write(1, show() + b'\n')

    def one_slice(sliceno, q, wait_for):
        try:
            if q:
                q.get()
            for ds in datasets:
                if ds in wait_for:
                    q.task_done()
                    q.get()
                grep(ds, sliceno)
        except KeyboardInterrupt:
            return
        except IOError as e:
            if e.errno == errno.EPIPE:
                return
            else:
                raise
        finally:
            # Make sure we are joinable
            try:
                q.task_done()
            except Exception:
                pass

    headers_prefix = []
    if args.show_dataset:
        headers_prefix.append('[DATASET]')
    if args.show_sliceno:
        headers_prefix.append('[SLICE]')
    if args.show_lineno:
        headers_prefix.append('[LINE]')

    headers = {}
    if args.headers:
        if columns:
            current_headers = columns
        else:
            current_headers = None
            for ds in datasets:
                candidate_headers = sorted(ds.columns)
                if candidate_headers != current_headers:
                    headers[ds] = current_headers = candidate_headers
            current_headers = headers.pop(datasets[0])

        def show_headers(headers):
            if args.format != 'json':
                show_items = headers_prefix + headers
                if escape_item:
                    show_items = list(map(escape_item, show_items))
                print(
                    separate(map(colour.blue, show_items),
                             map(len, show_items)))

        show_headers(current_headers)

    queues = []
    children = []
    if not args.ordered:
        q = None
        wait_for = set(headers)
        for sliceno in want_slices[1:]:
            if wait_for:
                q = JoinableQueue()
                q.put(None)
                queues.append(q)
            p = Process(
                target=one_slice,
                args=(sliceno, q, wait_for),
                name='slice-%d' % (sliceno, ),
            )
            p.daemon = True
            p.start()
            children.append(p)
        want_slices = want_slices[:1]

    try:
        for ds in datasets:
            if ds in headers:
                for q in queues:
                    q.join()
                show_headers(headers.pop(ds))
                for q in queues:
                    q.put(None)
            for sliceno in want_slices:
                grep(ds, sliceno)
        for c in children:
            c.join()
    except KeyboardInterrupt:
        print()
Esempio n. 17
0
def main(argv, cfg):
    # -C overrides -A and -B (which in turn override -C)
    class ContextAction(Action):
        def __call__(self, parser, namespace, values, option_string=None):
            namespace.before_context = namespace.after_context = values

    parser = ArgumentParser(
        usage=
        "%(prog)s [options] [-e] pattern [...] [-d] ds [...] [[-n] column [...]]",
        description="""positional arguments:
  pattern               (-e, --regexp)
  dataset               (-d, --dataset) can be specified as for "ax ds"
  columns               (-n, --column)""",
        prog=argv.pop(0),
        formatter_class=RawTextHelpFormatter,
    )
    parser.add_argument(
        '-c',
        '--chain',
        action='store_true',
        help="follow dataset chains",
    )
    parser.add_argument(
        '--colour',
        '--color',
        nargs='?',
        const='always',
        choices=['auto', 'never', 'always'],
        type=str.lower,
        help="colour matched text. can be auto, never or always",
        metavar='WHEN',
    )
    parser.add_argument(
        '-i',
        '--ignore-case',
        action='store_true',
        help="case insensitive pattern",
    )
    parser.add_argument(
        '-v',
        '--invert-match',
        action='store_true',
        help="select non-matching lines",
    )
    parser.add_argument(
        '-o',
        '--only-matching',
        action='store_true',
        help="only print matching part (or columns with -l)",
    )
    parser.add_argument(
        '-l',
        '--list-matching',
        action='store_true',
        help=
        "only print matching datasets (or slices with -S)\nwhen used with -o, only print matching columns",
    )
    parser.add_argument(
        '-H',
        '--headers',
        action='store_true',
        help="print column names before output (and on each change)",
    )
    parser.add_argument(
        '-O',
        '--ordered',
        action='store_true',
        help="output in order (one slice at a time)",
    )
    parser.add_argument(
        '-M',
        '--allow-missing-columns',
        action='store_true',
        help="datasets are allowed to not have (some) columns",
    )
    parser.add_argument(
        '-g',
        '--grep',
        action='append',
        help="grep this column only, can be specified multiple times",
        metavar='COLUMN')
    parser.add_argument(
        '-s',
        '--slice',
        action='append',
        help="grep this slice only, can be specified multiple times",
        type=int)
    parser.add_argument(
        '-D',
        '--show-dataset',
        action='store_true',
        help="show dataset on matching lines",
    )
    parser.add_argument(
        '-S',
        '--show-sliceno',
        action='store_true',
        help="show sliceno on matching lines",
    )
    parser.add_argument(
        '-L',
        '--show-lineno',
        action='store_true',
        help="show lineno (per slice) on matching lines",
    )
    supported_formats = (
        'csv',
        'raw',
        'json',
    )
    parser.add_argument(
        '-f',
        '--format',
        default='csv',
        choices=supported_formats,
        help="output format, csv (default) / " +
        ' / '.join(supported_formats[1:]),
        metavar='FORMAT',
    )
    parser.add_argument(
        '-t',
        '--separator',
        help="field separator, default tab / tab-like spaces",
    )
    parser.add_argument(
        '-T',
        '--tab-length',
        type=int,
        metavar='LENGTH',
        help="field alignment, always uses spaces as separator",
    )
    parser.add_argument(
        '-B',
        '--before-context',
        type=int,
        default=0,
        metavar='NUM',
        help="print NUM lines of leading context",
    )
    parser.add_argument(
        '-A',
        '--after-context',
        type=int,
        default=0,
        metavar='NUM',
        help="print NUM lines of trailing context",
    )
    parser.add_argument(
        '-C',
        '--context',
        type=int,
        default=0,
        metavar='NUM',
        action=ContextAction,
        help="print NUM lines of context\n" +
        "context is only taken from the same slice of the same\n" +
        "dataset, and may intermix with output from other\n" +
        "slices. Use -O to avoid that, or -S -L to see it.",
    )
    parser.add_argument('-e',
                        '--regexp',
                        default=[],
                        action='append',
                        dest='patterns',
                        help=SUPPRESS)
    parser.add_argument('-d',
                        '--dataset',
                        default=[],
                        action='append',
                        dest='datasets',
                        help=SUPPRESS)
    parser.add_argument('-n',
                        '--column',
                        default=[],
                        action='append',
                        dest='columns',
                        help=SUPPRESS)
    parser.add_argument('words', nargs='*', help=SUPPRESS)
    args = parser.parse_intermixed_args(argv)

    if args.before_context < 0 or args.after_context < 0:
        print('Context must be >= 0', file=sys.stderr)
        return 1

    columns = args.columns

    try:
        args.datasets = [name2ds(cfg, ds) for ds in args.datasets]
    except NoSuchWhateverError as e:
        print(e, file=sys.stderr)
        return 1

    for word in args.words:
        if not args.patterns:
            args.patterns.append(word)
        elif columns and args.datasets:
            columns.append(word)
        else:
            try:
                args.datasets.append(name2ds(cfg, word))
            except NoSuchWhateverError as e:
                if not args.datasets:
                    print(e, file=sys.stderr)
                    return 1
                columns.append(word)

    if not args.patterns or not args.datasets:
        parser.print_help(file=sys.stderr)
        return 1

    datasets = args.datasets
    patterns = []
    for pattern in args.patterns:
        try:
            patterns.append(
                re.compile(pattern, re.IGNORECASE if args.ignore_case else 0))
        except re.error as e:
            print("Bad pattern %r:\n%s" % (
                pattern,
                e,
            ), file=sys.stderr)
            return 1

    grep_columns = set(args.grep or ())
    if grep_columns == set(columns):
        grep_columns = set()

    if args.slice:
        want_slices = []
        for s in args.slice:
            assert 0 <= s < g.slices, "Slice %d not available" % (s, )
            if s not in want_slices:
                want_slices.append(s)
    else:
        want_slices = list(range(g.slices))

    if len(want_slices) == 1:
        # it will be automatically ordered, so let's not work for it.
        args.ordered = False

    if args.only_matching:
        if args.list_matching:
            args.list_matching = False
            only_matching = 'columns'
        else:
            only_matching = 'part'
    else:
        only_matching = False

    if args.chain:
        datasets = list(chain.from_iterable(ds.chain() for ds in datasets))

    def columns_for_ds(ds, columns=columns):
        if columns:
            return [n for n in columns if n in ds.columns]
        else:
            return sorted(ds.columns)

    if columns or grep_columns:
        if args.allow_missing_columns:
            keep_datasets = []
            for ds in datasets:
                if not columns_for_ds(ds):
                    continue
                if grep_columns and not columns_for_ds(ds, grep_columns):
                    continue
                keep_datasets.append(ds)
            if not keep_datasets:
                return 0
            datasets = keep_datasets
        else:
            bad = False
            need_cols = set(columns)
            if grep_columns:
                need_cols.update(grep_columns)
            for ds in datasets:
                missing = need_cols - set(ds.columns)
                if missing:
                    print('ERROR: %s does not have columns %r' % (
                        ds,
                        missing,
                    ),
                          file=sys.stderr)
                    bad = True
            if bad:
                return 1

    # For the status reporting, this gives how many lines have been processed
    # when reaching each ds ix, per slice. Ends with an extra fictional ds,
    # i.e. the total number of lines for that slice. And then the same again,
    # to simplify the code in the status shower.
    total_lines_per_slice_at_ds = [[0] * g.slices]
    for ds in datasets:
        total_lines_per_slice_at_ds.append(
            [a + b for a, b in zip(total_lines_per_slice_at_ds[-1], ds.lines)])
    total_lines_per_slice_at_ds.append(total_lines_per_slice_at_ds[-1])
    status_interval = {
        # twice per percent, but not too often or too seldom
        sliceno: min(max(total_lines_per_slice_at_ds[-1][sliceno] // 200, 10),
                     5000)
        for sliceno in want_slices
    }

    # never and always override env settings, auto (default) sets from env/tty
    if args.colour == 'never':
        colour.disable()
        highlight_matches = False
    elif args.colour == 'always':
        colour.enable()
        highlight_matches = True
    else:
        args.colour = 'auto'
        highlight_matches = colour.enabled

    # Don't highlight everything when just trying to cat
    if args.patterns == ['']:
        highlight_matches = False
    # Don't highlight anything with -l
    if args.list_matching:
        highlight_matches = False

    if args.format == 'json':
        # headers was just a mistake, ignore it
        args.headers = False

    separator = args.separator
    if args.tab_length:
        separator = None
    elif separator is None and not sys.stdout.isatty():
        separator = '\t'

    if separator is None:
        # special case where we try to be like a tab, but with spaces.
        # this is useful because terminals typically don't style tabs.
        # and also so you can change the length of tabs.
        if (args.tab_length or 0) < 1:
            args.tab_length = 8

        def separate(items, lens):
            things = []
            for item, item_len in zip(items, lens):
                things.append(item)
                spaces = args.tab_length - (item_len % args.tab_length)
                things.append(colour(' ' * spaces, 'grep/separator'))
            return ''.join(things[:-1])

        separator = '\t'
    else:
        separator_coloured = colour(separator, 'grep/separator')

        def separate(items, lens):
            return separator_coloured.join(items)

    def json_default(obj):
        if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)):
            return str(obj)
        elif isinstance(obj, complex):
            return [obj.real, obj.imag]
        else:
            return repr(obj)

    if args.format == 'csv':

        def escape_item(item):
            if item and (separator in item or item[0] in '\'"'
                         or item[-1] in '\'"'):
                return '"' + item.replace('\n', '\\n').replace('"', '""') + '"'
            else:
                return item.replace('\n', '\\n')

        errors = 'surrogatepass'
    else:
        escape_item = None
        errors = 'replace' if PY2 else 'surrogateescape'

    # This is for the ^T handling. Each slice sends an update when finishing
    # a dataset, and every status_interval[sliceno] lines while iterating.
    # To minimise the data sent the only information sent over the queue
    # is (sliceno, finished_dataset).
    # Status printing is triggered by ^T (or SIGINFO if that is available)
    # or by SIGUSR1.
    # Pressing it again within two seconds prints stats per slice too.
    q_status = mp.LockFreeQueue()

    def status_collector():
        q_status.make_reader()
        status = {sliceno: [0, 0] for sliceno in want_slices}
        #            [ds_ix, done_lines]
        total_lines = sum(total_lines_per_slice_at_ds[-1])
        previous = [0]
        # base colour conf in if stderr is a tty, not stdout.
        if args.colour == 'auto':
            colour.configure_from_environ(stdout=sys.stderr)

        def show(sig, frame):
            t = monotonic()
            verbose = (previous[0] + 2 > t)  # within 2 seconds of previous
            previous[0] = t
            ds_ixes = []
            progress_lines = []
            progress_fraction = []
            for sliceno in want_slices:
                ds_ix, done_lines = status[sliceno]
                ds_ixes.append(ds_ix)
                max_possible = min(
                    done_lines + status_interval[sliceno],
                    total_lines_per_slice_at_ds[ds_ix + 1][sliceno])
                done_lines = (done_lines +
                              max_possible) / 2  # middle of the possibilities
                progress_lines.append(done_lines)
                total = total_lines_per_slice_at_ds[-1][sliceno]
                if total == 0:
                    progress_fraction.append(1)
                else:
                    progress_fraction.append(done_lines / total)
            progress_total = sum(progress_lines) / (total_lines or 1)
            bad_cutoff = progress_total - 0.1
            if verbose:
                show_ds = (len(datasets) > 1 and min(ds_ixes) != max(ds_ixes))
                for sliceno, ds_ix, p in zip(want_slices, ds_ixes,
                                             progress_fraction):
                    if ds_ix == len(datasets):
                        msg = 'DONE'
                    else:
                        msg = '{0:d}% of {1:n} lines'.format(
                            round(p * 100),
                            total_lines_per_slice_at_ds[-1][sliceno])
                        if show_ds:
                            msg = '%s (in %s)' % (
                                msg,
                                datasets[ds_ix].quoted,
                            )
                    msg = '%9d: %s' % (
                        sliceno,
                        msg,
                    )
                    if p < bad_cutoff:
                        msg = colour(msg, 'grep/infohighlight')
                    else:
                        msg = colour(msg, 'grep/info')
                    write(2, msg.encode('utf-8') + b'\n')
            msg = '{0:d}% of {1:n} lines'.format(round(progress_total * 100),
                                                 total_lines)
            if len(datasets) > 1:
                min_ds = min(ds_ixes)
                max_ds = max(ds_ixes)
                if min_ds < len(datasets):
                    ds_name = datasets[min_ds].quoted
                    extra = '' if min_ds == max_ds else ' ++'
                    msg = '%s (in %s%s)' % (
                        msg,
                        ds_name,
                        extra,
                    )
            worst = min(progress_fraction)
            if worst < bad_cutoff:
                msg = '%s, worst %d%%' % (
                    msg,
                    round(worst * 100),
                )
            msg = colour('  SUMMARY: %s' % (msg, ), 'grep/info')
            write(2, msg.encode('utf-8') + b'\n')

        for signame in ('SIGINFO', 'SIGUSR1'):
            if hasattr(signal, signame):
                sig = getattr(signal, signame)
                signal.signal(sig, show)
                if hasattr(signal, 'pthread_sigmask'):
                    signal.pthread_sigmask(signal.SIG_UNBLOCK, {sig})
        tc_original = None
        using_stdin = False
        if not hasattr(signal, 'SIGINFO') and sys.stdin.isatty():
            # ^T wont work automatically on this OS, so we need to handle it as terminal input
            import termios
            from accelerator.compat import selectors
            sel = selectors.DefaultSelector()
            sel.register(0, selectors.EVENT_READ)
            sel.register(q_status.r, selectors.EVENT_READ)
            try:
                tc_original = termios.tcgetattr(0)
                tc_changed = list(tc_original)
                tc_changed[3] &= ~(termios.ICANON | termios.IEXTEN)
                termios.tcsetattr(0, termios.TCSADRAIN, tc_changed)
                using_stdin = True
            except Exception:
                pass
            # we can't set stdin nonblocking, because it's probably the same
            # file description as stdout, so work around that with alarms.
            def got_alarm(sig, frame):
                raise IOError()

            signal.signal(signal.SIGALRM, got_alarm)
        try:
            while True:
                if using_stdin:
                    do_q = False
                    for key, _ in sel.select():
                        if key.fd == 0:
                            try:
                                signal.alarm(
                                    1
                                )  # in case something else read it we block for max 1 second
                                try:
                                    pressed = ord(os.read(0, 1))
                                finally:
                                    signal.alarm(0)
                                if pressed == 20:
                                    write(2,
                                          b'\n')  # "^T" shows in the terminal
                                    os.kill(os.getpid(), signal.SIGUSR1)
                            except Exception:
                                pass
                        elif key.fd == q_status.r:
                            do_q = True
                    if not do_q:
                        continue
                try:
                    sliceno, finished_dataset = q_status.get()
                except QueueEmpty:
                    return
                if finished_dataset:
                    ds_ix = status[sliceno][0] + 1
                    status[sliceno] = [
                        ds_ix, total_lines_per_slice_at_ds[ds_ix][sliceno]
                    ]
                else:
                    status[sliceno][1] += status_interval[sliceno]
        finally:
            if tc_original is not None:
                try:
                    termios.tcsetattr(0, termios.TCSADRAIN, tc_original)
                except Exception:
                    pass

    status_process = mp.SimplifiedProcess(target=status_collector,
                                          name='ax grep status')
    # everything else will write, so make it a writer right away
    q_status.make_writer()

    # Output is only allowed while holding this lock, so that long lines
    # do not get intermixed. (Or when alone in producing output.)
    io_lock = Lock()

    # This contains some extra stuff to be a better base for the other
    # outputters.
    # When used directly it enforces no ordering, but merges smaller writes
    # to keep the number of syscalls down.

    class Outputter:
        def __init__(self, q_in, q_out):
            self.q_in = q_in
            self.q_out = q_out
            self.buffer = []
            self.merge_buffer = b''

        def put(self, data):
            self.merge_buffer += data
            if len(self.merge_buffer) >= 1024:
                self.move_merge()

        def move_merge(self):
            if self.merge_buffer:
                with io_lock:
                    write(1, self.merge_buffer)
                self.merge_buffer = b''

        def start(self, ds):
            pass

        def end(self, ds):
            self.move_merge()

        def finish(self):
            pass

        def full(self):
            return len(self.buffer) > 5000

        def excite(self):
            self.move_merge()
            if self.buffer:
                self.pump(False)

    # Partially ordered output, each header change acts as a fence.
    # This is used in all slices except the first.
    #
    # The queue gets True when the previous slice is ready for the next
    # header change, and None when the header is printed (and it's ok
    # to resume output).

    class HeaderWaitOutputter(Outputter):
        def start(self, ds):
            if ds in headers:
                self.add_wait()
            else:
                self.excite()

        def add_wait(self):
            # Each sync point is separated by None in the buffer
            self.buffer.append(None)
            self.buffer.append(b'')  # Avoid need for special case in .drain
            self.pump()

        def move_merge(self):
            data = self.merge_buffer
            self.merge_buffer = b''
            if self.buffer:
                self.pump()
                if self.buffer:
                    self.buffer.append(data)
                    return
            with io_lock:
                write(1, data)

        def pump(self, wait=None):
            if wait is None:
                wait = self.full()
            try:
                got = self.q_in.get(wait)
            except QueueEmpty:
                if wait:
                    # previous slice has exited without sending all messages
                    raise
                return
            if got is True:
                # since pump is only called when we have outputted all
                # currently allowed output or when the next message is an
                # unblock for such output we can just unconditionally send
                # the True on to the next slice here.
                self.q_out.put(True)
                self.pump(wait)
                return
            else:
                self.q_out.put(None)
                self.drain()

        def drain(self):
            assert self.buffer[
                0] is None, 'The buffer must always stop at a sync point (or empty)'
            with io_lock:
                for pos, data in enumerate(self.buffer[1:], 1):
                    if data is None:
                        break
                    elif data:
                        write(1, data)
                else:
                    # We did not reach the next fence, so last item is real data
                    # and needs to be removed. (The buffer will then be empty and
                    # output will continue directly until reaching the sync point.)
                    pos += 1
            self.buffer[:pos] = ()

        def finish(self):
            while self.buffer:
                self.pump(True)

    # Partially ordered output, each header change acts as a fence.
    # This is used only in the first slice, and outputs the headers.
    #
    # When it is ready to output headers it sends True in the queue.
    # When the True has travelled around the queue ring all slices are
    # ready, the headers are printed, and None is sent to let the other
    # slices resume output.
    # (When the None returns it is ignored, because output is resumed
    # as soon as the headers are printed.)

    class HeaderOutputter(HeaderWaitOutputter):
        def add_wait(self):
            if not self.buffer:
                self.q_out.put(True)
            self.buffer.append(None)
            self.buffer.append(
                b'')  # Avoid need for special case in .drain/.put
            self.pump()

        def drain(self):
            assert self.buffer[
                0] is None, 'The buffer must always stop at a sync point (or empty)'
            with io_lock:
                for pos, data in enumerate(self.buffer[1:], 1):
                    if data is None:
                        self.q_out.put(True)
                        break
                    elif data:
                        write(1, data)
                else:
                    pos += 1
            self.buffer[:pos] = ()

        def pump(self, wait=None):
            if wait is None:
                wait = self.full()
            try:
                got = self.q_in.get(wait)
            except QueueEmpty:
                if wait:
                    # previous slice has exited without sending all messages
                    raise
                return
            if got is True:
                # The True we put in when reaching the fence has travelled
                # all the way around the queue ring, it's time to print the
                # new headers
                write(1, next(headers_iter))
                # and then unblock the other slices
                self.q_out.put(None)
                self.drain()
                # No else, when the None comes back we just drop it.
            if not wait:
                self.pump(False)

    # Fully ordered output, each slice waits for the previous slice.
    # For each ds, waits for None (anything really) before starting,
    # sends None when done.

    class OrderedOutputter(Outputter):
        def start(self, ds):
            # Each ds is separated by None in the buffer
            self.buffer.append(None)
            self.buffer.append(b'')  # Avoid need for special case in .drain
            self.pump()

        def end(self, ds):
            self.move_merge()
            if not self.buffer:
                # We are done with this ds, so let next slice continue
                self.q_out.put(None)

        def pump(self, wait=None):
            if wait is None:
                wait = self.full()
            try:
                self.q_in.get(wait)
            except QueueEmpty:
                if wait:
                    # previous slice has exited without sending all messages
                    raise
                return
            self.drain()

        def move_merge(self):
            data = self.merge_buffer
            self.merge_buffer = b''
            if self.buffer:
                self.pump()
                if self.buffer:
                    self.buffer.append(data)
                    return
            # No need for a lock, the other slices aren't writing concurrently.
            write(1, data)

        def drain(self):
            assert self.buffer[0] is None
            for pos, data in enumerate(self.buffer[1:], 1):
                if data is None:
                    # We are done with this ds, so let next slice continue
                    self.q_out.put(None)
                    break
                elif data:
                    write(1, data)
            else:
                # We did not reach the next ds, so last item is real data and
                # needs to be removed. (The buffer will then be empty and
                # output will continue directly until reaching the next ds.)
                pos += 1
            self.buffer[:pos] = ()

        def finish(self):
            not_finished = bool(self.buffer)
            while self.buffer:
                self.pump(True)
            if not_finished:
                self.q_out.put(None)

    # Same as above but for the first slice so it prints headers when needed.

    class OrderedHeaderOutputter(OrderedOutputter):
        def start(self, ds):
            # Each ds is separated by None in the buffer
            self.buffer.append(None)
            if ds in headers:
                # Headers changed, start with those.
                self.buffer.append(next(headers_iter))
            else:
                self.buffer.append(
                    b'')  # Avoid need for special case in .drain
            self.pump()

    # Choose the right outputter for the kind of sync we need.
    def outputter(q_in, q_out, first_slice=False):
        if args.list_matching:
            cls = Outputter
        elif args.ordered:
            if first_slice:
                cls = OrderedHeaderOutputter
            else:
                cls = OrderedOutputter
        elif headers:
            if first_slice:
                cls = HeaderOutputter
            else:
                cls = HeaderWaitOutputter
        else:
            cls = Outputter
        return cls(q_in, q_out)

    # Make printer for the selected output options
    def make_show(prefix, used_columns):
        def matching_ranges(item):
            ranges = []
            for p in patterns:
                ranges.extend(m.span() for m in p.finditer(item))
            if not ranges:
                return
            # merge overlapping/adjacent ranges
            ranges.sort()
            ranges = iter(ranges)
            start, stop = next(ranges)
            for a, b in ranges:
                if a <= stop:
                    stop = max(stop, b)
                else:
                    yield start, stop
                    start, stop = a, b
            yield start, stop

        def filter_item(item):
            return ''.join(item[a:b] for a, b in matching_ranges(item))

        if args.format == 'json':
            dumps = json.JSONEncoder(ensure_ascii=False,
                                     default=json_default).encode

            def show(lineno, items):
                if only_matching == 'part':
                    items = [filter_item(unicode(item)) for item in items]
                if only_matching == 'columns':
                    d = {
                        k: v
                        for k, v in zip(used_columns, items)
                        if filter_item(unicode(v))
                    }
                else:
                    d = dict(zip(used_columns, items))
                if args.show_lineno:
                    prefix['lineno'] = lineno
                if prefix:
                    prefix['data'] = d
                    d = prefix
                return dumps(d).encode('utf-8', 'surrogatepass') + b'\n'
        else:

            def colour_item(item):
                pos = 0
                parts = []
                for a, b in matching_ranges(item):
                    parts.extend(
                        (item[pos:a], colour(item[a:b], 'grep/highlight')))
                    pos = b
                parts.append(item[pos:])
                return ''.join(parts)

            def show(lineno, items):
                data = list(prefix)
                if args.show_lineno:
                    data.append(unicode(lineno))
                show_items = map(unicode, items)
                if only_matching:
                    if only_matching == 'columns':
                        show_items = (item if filter_item(item) else ''
                                      for item in show_items)
                    else:
                        show_items = map(filter_item, show_items)
                show_items = list(show_items)
                lens = (len(item) for item in data + show_items)
                if highlight_matches:
                    show_items = list(map(colour_item, show_items))
                if escape_item:
                    lens_unesc = (len(item) for item in data + show_items)
                    show_items = list(map(escape_item, show_items))
                    lens_esc = (len(item) for item in data + show_items)
                    lens = (
                        l + esc - unesc
                        for l, unesc, esc in zip(lens, lens_unesc, lens_esc))
                data.extend(show_items)
                return separate(data, lens).encode('utf-8', errors) + b'\n'

        return show

    # This is called for each slice in each dataset.
    # Each slice has a separate process (the same for all datasets).
    # The first slice runs in the main process (unless -l), everything
    # else runs from one_slice.

    def grep(ds, sliceno, out):
        out.start(ds)
        if len(patterns) == 1:
            chk = patterns[0].search
        else:

            def chk(s):
                return any(p.search(s) for p in patterns)

        first = [True]

        def mk_iter(col):
            kw = {}
            if first[0]:
                first[0] = False
                lines = ds.lines[sliceno]
                if lines > status_interval[sliceno]:

                    def cb(n):
                        q_status.put((sliceno, False))
                        out.excite()

                    kw['callback'] = cb
                    kw['callback_interval'] = status_interval[sliceno]
            if ds.columns[col].type == 'ascii':
                kw['_type'] = 'unicode'
            it = ds._column_iterator(sliceno, col, **kw)
            if ds.columns[col].type == 'bytes':
                errors = 'replace' if PY2 else 'surrogateescape'
                if ds.columns[col].none_support:
                    it = (None if v is None else v.decode('utf-8', errors)
                          for v in it)
                else:
                    it = (v.decode('utf-8', errors) for v in it)
            return it

        used_columns = columns_for_ds(ds)
        used_grep_columns = grep_columns and columns_for_ds(ds, grep_columns)
        if grep_columns and set(used_grep_columns) != set(used_columns):
            grep_iter = izip(*(mk_iter(col) for col in used_grep_columns))
        else:
            grep_iter = repeat(None)
        lines_iter = izip(*(mk_iter(col) for col in used_columns))
        if args.before_context:
            before = deque((), args.before_context)
        else:
            before = None
        if args.format == 'json':
            prefix = {}
            if args.show_dataset:
                prefix['dataset'] = ds
            if args.show_sliceno:
                prefix['sliceno'] = sliceno
            show = make_show(prefix, used_columns)
        else:
            prefix = []
            if args.show_dataset:
                prefix.append(ds)
            if args.show_sliceno:
                prefix.append(str(sliceno))
            prefix = tuple(prefix)
            show = make_show(prefix, used_columns)
        if args.invert_match:
            maybe_invert = operator.not_
        else:
            maybe_invert = bool
        to_show = 0
        for lineno, (grep_items,
                     items) in enumerate(izip(grep_iter, lines_iter)):
            if maybe_invert(
                    any(chk(unicode(item)) for item in grep_items or items)):
                if q_list:
                    q_list.put((ds, sliceno))
                    return
                while before:
                    out.put(show(*before.popleft()))
                to_show = 1 + args.after_context
            if to_show:
                out.put(show(lineno, items))
                to_show -= 1
            elif before is not None:
                before.append((lineno, items))
        out.end(ds)

    # This runs in a separate process for each slice except the first
    # one (unless -l), which is handled specially in the main process.

    def one_slice(sliceno, q_in, q_out, q_to_close):
        if q_to_close:
            q_to_close.close()
        if q_in:
            q_in.make_reader()
        if q_out:
            q_out.make_writer()
        if q_list:
            q_list.make_writer()
        try:
            out = outputter(q_in, q_out)
            for ds in datasets:
                if seen_list is None or ds not in seen_list:
                    grep(ds, sliceno, out)
                q_status.put((sliceno, True))
            out.finish()
        except QueueEmpty:
            # some other process died, no need to print an error here
            sys.exit(1)

    headers_prefix = []
    if args.show_dataset:
        headers_prefix.append('[DATASET]')
    if args.show_sliceno:
        headers_prefix.append('[SLICE]')
    if args.show_lineno:
        headers_prefix.append('[LINE]')

    # {ds: headers} for each ds where headers change (not including the first).
    # this is every ds where sync between slices has to happen when not --ordered.
    headers = OrderedDict()
    if args.headers:
        current_headers = None
        for ds in datasets:
            candidate_headers = columns_for_ds(ds)
            if candidate_headers != current_headers:
                headers[ds] = current_headers = candidate_headers

        def gen_headers(headers):
            show_items = headers_prefix + headers
            if escape_item:
                show_items = list(map(escape_item, show_items))
            coloured = (colour(item, 'grep/header') for item in show_items)
            txt = separate(coloured, map(len, show_items))
            return txt.encode('utf-8', 'surrogatepass') + b'\n'

        # remove the starting ds, so no header changes means no special handling.
        current_headers = headers.pop(datasets[0])
        if not args.list_matching:
            write(1, gen_headers(current_headers))
        headers_iter = iter(map(gen_headers, headers.values()))

    q_in = q_out = first_q_out = q_to_close = q_list = None
    children = [status_process]
    seen_list = None
    if args.list_matching:
        # in this case all slices get their own process
        # and the main process just prints the maching slices
        q_list = mp.LockFreeQueue()
        separate_process_slices = want_slices
        if not args.show_sliceno:
            seen_list = mp.MpSet()
    else:
        separate_process_slices = want_slices[1:]
        if args.ordered or headers:
            # needs to sync in some way
            q_in = first_q_out = mp.LockFreeQueue()
    for sliceno in separate_process_slices:
        if q_in:
            q_out = mp.LockFreeQueue()
        p = mp.SimplifiedProcess(
            target=one_slice,
            args=(
                sliceno,
                q_in,
                q_out,
                q_to_close,
            ),
            name='slice-%d' % (sliceno, ),
        )
        children.append(p)
        if q_in and q_in is not first_q_out:
            q_in.close()
        q_to_close = first_q_out
        q_in = q_out
    if q_in:
        q_out = first_q_out
        q_in.make_reader()
        q_out.make_writer()
        if args.ordered:
            q_in.put_local(None)
    del q_to_close
    del first_q_out

    try:
        if args.list_matching:
            if args.headers:
                headers_prefix = ['[DATASET]']
                if seen_list is None:
                    headers_prefix.append('[SLICE]')
                write(1, gen_headers([]))
            ordered_res = defaultdict(set)
            q_list.make_reader()
            if seen_list is None:
                used_columns = ['dataset', 'sliceno']
            else:
                used_columns = ['dataset']
            inner_show = make_show({} if args.format == 'json' else [],
                                   used_columns)

            def show(ds, sliceno=None):
                if sliceno is None:
                    items = [ds]
                else:
                    items = [ds, sliceno]
                write(1, inner_show(None, items))

            while True:
                try:
                    ds, sliceno = q_list.get()
                except QueueEmpty:
                    break
                if seen_list is None:
                    if args.ordered:
                        ordered_res[ds].add(sliceno)
                    else:
                        show(ds, sliceno)
                elif ds not in seen_list:
                    seen_list.add(ds)
                    if not args.ordered:
                        show(ds)
            if args.ordered:
                for ds in datasets:
                    if seen_list is None:
                        for sliceno in sorted(ordered_res[ds]):
                            show(ds, sliceno)
                    else:
                        if ds in seen_list:
                            show(ds)
        else:
            out = outputter(q_in, q_out, first_slice=True)
            sliceno = want_slices[0]
            for ds in datasets:
                grep(ds, sliceno, out)
                q_status.put((sliceno, True))
            out.finish()
    except QueueEmpty:
        # don't print an error, probably a subprocess died from EPIPE before
        # the main process. (or the subprocess already printed an error.)
        return 1

    q_status.close()
    for c in children:
        c.join()
        if c.exitcode:
            return 1