Exemple #1
0
def launch(workdir, setup, config, Methods, active_workdirs, slices, concurrency, debug, server_url, subjob_cookie, parent_pid):
	starttime = monotonic()
	jobid = setup.jobid
	method = setup.method
	if subjob_cookie:
		print_prefix = ''
	else:
		print_prefix = '    '
	print('%s| %s [%s] |' % (print_prefix, jobid, method,))
	args = dict(
		workdir=workdir,
		slices=slices,
		concurrency=concurrency,
		jobid=jobid,
		result_directory=config.get('result_directory', ''),
		common_directory=config.get('common_directory', ''),
		input_directory=config.get('input_directory', ''),
		workdirs=active_workdirs,
		server_url=server_url,
		subjob_cookie=subjob_cookie,
		parent_pid=parent_pid,
		debuggable=config.debuggable,
	)
	from accelerator.runner import runners
	runner = runners[Methods.db[method].version]
	child, prof_r = runner.launch_start(args)
	# There's a race where if we get interrupted right after fork this is not recorded
	# (the launched job could continue running)
	try:
		children.add(child)
		status, data = runner.launch_finish(child, prof_r, workdir, jobid, method)
		if status:
			os.killpg(child, SIGTERM) # give it a chance to exit gracefully
			# The dying process won't have sent an end message, so it has
			# the endwait time until we SIGKILL it.
			print('%s| %s [%s]  failed!    (%5.1fs) |' % (print_prefix, jobid, method, monotonic() - starttime))
		# There is a race where stuff on the status socket has not arrived when
		# the sending process exits. This is basically benign, but let's give
		# it a chance to arrive to cut down on confusing warnings.
		statmsg_endwait(child, 0.1)
	finally:
		try:
			os.killpg(child, SIGKILL) # this should normally be a no-op, but in case it left anything.
		except Exception:
			pass
		try:
			children.remove(child)
		except Exception:
			pass
		try:
			# won't block long (we just killed it, plus it had probably already exited)
			runner.launch_waitpid(child)
		except Exception:
			pass
	if status:
		raise JobError(jobid, method, status)
	print('%s| %s [%s]  completed. (%5.1fs) |' % (print_prefix, jobid, method, monotonic() - starttime))
	return data
Exemple #2
0
	def __init__(self, package_list, configfilename, server_config):
		super(SubMethods, self).__init__(package_list, configfilename)
		t0 = monotonic()
		per_runner = defaultdict(list)
		for key, val in iteritems(self.db):
			package = val['package']
			per_runner[val['version']].append((package, key))
		self.runners = new_runners(server_config, set(per_runner))
		warnings = []
		failed = []
		self.hash = {}
		self.params = {}
		self.descriptions = {}
		self.typing = {}
		for version, data in iteritems(per_runner):
			runner = self.runners.get(version)
			if not runner:
				msg = '%%s.%%s (unconfigured interpreter %s)' % (version)
				failed.extend(msg % t for t in sorted(data))
				continue
			v = runner.get_ax_version()
			if v != ax_version:
				if runner.python == sys.executable:
					raise Exception("Server is using accelerator %s but %s is currently installed, please restart server." % (ax_version, v,))
				else:
					print("WARNING: Server is using accelerator %s but runner %r is using accelerator %s." % (ax_version, version, v,))
			w, f, h, p, d = runner.load_methods(package_list, data)
			warnings.extend(w)
			failed.extend(f)
			self.hash.update(h)
			self.params.update(p)
			self.descriptions.update(d)
		for key, params in iteritems(self.params):
			self.typing[key] = options2typing(key, params.options)
			params.defaults = params2defaults(params)
			params.required = options2required(params.options)
		def prt(a, prefix):
			maxlen = (max(len(e) for e in a) + len(prefix))
			line = '=' * maxlen
			print()
			print(line)
			for e in sorted(a):
				msg = prefix + e
				print(msg + ' ' * (maxlen - len(msg)))
			print(line)
			print()
		if warnings:
			prt(warnings, 'WARNING: ')
		if failed:
			print('\033[47;31;1m')
			prt(failed, 'FAILED to import ')
			print('\033[m')
			raise MethodLoadException(failed)
		print("Updated %d methods on %d runners in %.1f seconds" % (
		      len(self.hash), len(per_runner), monotonic() - t0,
		     ))
Exemple #3
0
 def get(self, block=True, timeout=0):
     assert self.w == -1, "call make_reader first"
     if self._local:
         return self._local.pop(0)
     self._late_setup()
     if timeout:
         deadline = monotonic() + timeout
     need_data = False
     eof = (self.r == -1)
     while True:
         if not eof:
             try:
                 data = os.read(self.r, PIPE_BUF)
                 if not data:
                     eof = True
                 self._buf += data
                 need_data = False
             except OSError:
                 pass
         if len(self._buf) >= 6:
             z, pid = struct.unpack('<HI', self._buf[:6])
             assert pid, "all is lost"
             if len(self._buf) < 6 + z:
                 need_data = True
             else:
                 data = self._buf[6:6 + z]
                 self._buf = self._buf[6 + z:]
                 if pid not in self._partial:
                     want_len = struct.unpack("<I", data[:4])[0]
                     have = [data[4:]]
                     have_len = len(have[0])
                 else:
                     want_len, have, have_len = self._partial[pid]
                     have.append(data)
                     have_len += len(data)
                 if have_len == want_len:
                     self._partial.pop(pid, None)
                     data = b''.join(have)
                     return pickle.loads(data)
                 else:
                     self._partial[pid] = (want_len, have, have_len)
         if len(self._buf) < 6 or need_data:
             if eof:
                 raise QueueEmpty()
             if not block:
                 if not self._r_sel.select(0):
                     raise QueueEmpty()
             elif timeout:
                 time_left = deadline - monotonic()
                 if time_left <= 0:
                     raise QueueEmpty()
                 self._r_sel.select(time_left)
             else:
                 self._r_sel.select()
Exemple #4
0
def status_stacks_export():
	res = []
	last = [None]
	current = None
	def fmt(tree, start_indent=0):
		for pid, d in sorted(iteritems(tree), key=lambda i: (i[1].stack or ((0,),))[0][0]):
			last[0] = d
			indent = start_indent
			for msg, t, _ in d.stack:
				res.append((pid, indent, msg, t))
				indent += 1
			fmt(d.children, indent)
			if d.output:
				res.append((pid, -1,) + d.output)
	try:
		with status_stacks_lock:
			fmt(status_tree)
		if last[0]:
			current = last[0].summary
			if len(last[0].stack) > 1 and not current[1].endswith('analysis'):
				msg, t, _ = last[0].stack[1]
				current = (current[0], '%s %s' % (current[1], msg,), t,)
	except Exception:
		print_exc(file=sys.stderr)
		res.append((0, 0, 'ERROR', monotonic()))
	return res, current
Exemple #5
0
def status(msg):
	if g.running in ('server', 'build', 'shell',):
		yield lambda _: None
		return
	global _cookie
	_cookie += 1
	cookie = str(_cookie)
	t = str(monotonic())
	typ = 'push'
	# capture the PID here, because update might be called in a different process
	pid = os.getpid()
	update_local = _local_status.append
	def update(msg):
		assert msg and isinstance(msg, str_types) and '\0' not in msg
		update_local(msg)
		_send(typ, '\0'.join((msg, t, cookie)), pid=pid)
	update(msg)
	update_local = partial(_local_status.__setitem__, len(_local_status) - 1)
	typ = 'update'
	try:
		yield update
	except Exception as e:
		global _exc_status
		if _exc_status[0] != e:
			_exc_status = (e, list(_local_status))
		raise
	finally:
		_local_status.pop()
		_send('pop', cookie)
Exemple #6
0
def _start(msg, parent_pid, is_analysis=False):
	global _cookie
	if is_analysis:
		_cookie += 1
		analysis_cookie = str(_cookie)
	else:
		analysis_cookie = ''
	_send('start', '%d\0%s\0%s\0%f' % (parent_pid, analysis_cookie, msg, monotonic(),))
	def update(msg):
		_send('update', '%s\0\0%s' % (msg, analysis_cookie,))
	return update
Exemple #7
0
def print_status_stacks(stacks=None):
	if stacks == None:
		stacks, _ = status_stacks_export()
	report_t = monotonic()
	for pid, indent, msg, t in stacks:
		if indent < 0:
			print("%6d TAIL OF OUTPUT: (%.1f seconds ago)" % (pid, report_t - t,))
			msgs = list(filter(None, msg.split('\n')))[-3:]
			for msg in msgs:
				print("       " + colour.green(msg))
		else:
			print("%6d STATUS: %s%s (%.1f seconds)" % (pid, "    " * indent, msg, report_t - t))
Exemple #8
0
def main(urd):
    assert urd.info.slices >= 3, "The tests don't work with less than 3 slices (you have %d)." % (
        urd.info.slices, )

    print()
    print("Testing urd.build and job.load")
    want = ({
        'foo': 'foo',
        'a': 'a'
    }, {
        'foo': None,
        'b': None
    }, {
        'foo': None,
        'c': None
    })
    job = urd.build("test_build_kws")
    assert job.load() == want
    bad = None
    try:
        urd.build("test_build_kws", options=dict(foo='bar'), foo='baz')
        bad = 'Allowed ambiguous keyword "foo"'
    except Exception:
        pass
    assert not bad, bad
    want[0]['foo'] = 'bar'
    want[0]['a'] = 'A'
    job = urd.build("test_build_kws", options=dict(foo='bar'), a='A')
    assert job.load() == want
    assert urd.build("test_build_kws",
                     options=dict(foo='bar'),
                     a='A',
                     b=None,
                     c=None) == job
    want[2]['c'] = job
    job = urd.build("test_build_kws",
                    options=dict(foo='bar', a='override this from kw'),
                    a='A',
                    c=job)
    assert job.load() == want
    want[0]['foo'] = 'foo'
    want[2]['c'] = job
    job = urd.build("test_build_kws",
                    a='A',
                    b=None,
                    c=job,
                    datasets=dict(b='overridden'))
    assert job.load() == want

    print()
    print("Testing urd.begin/end/truncate/get/peek/latest/first/since")
    urd.truncate("tests_urd", 0)
    assert not urd.peek_latest("tests_urd").joblist
    urd.begin("tests_urd", 1, caption="first")
    urd.build("test_build_kws")
    fin = urd.finish("tests_urd")
    assert fin == {'new': True, 'changed': False, 'is_ghost': False}, fin
    urd.begin("tests_urd", 1)
    job = urd.build("test_build_kws")
    fin = urd.finish("tests_urd", caption="first")
    assert fin == {'new': False, 'changed': False, 'is_ghost': False}, fin
    urd.begin("tests_urd", 1)  # will be overridden to 2 in finish
    jl = urd.latest("tests_urd").joblist
    assert jl == [job], '%r != [%r]' % (
        jl,
        job,
    )
    urd.build("test_build_kws", options=dict(foo='bar', a='A'))
    urd.finish("tests_urd", 2, caption="second")
    u = urd.peek_latest("tests_urd")
    assert u.caption == "second"
    dep0 = list(u.deps.values())[0]
    assert dep0.caption == "first", dep0.caption
    assert dep0.joblist == jl, '%r != %r' % (
        dep0.joblist,
        jl,
    )
    assert urd.since("tests_urd", 0) == ['1', '2']
    urd.truncate("tests_urd", 2)
    assert urd.since("tests_urd", 0) == ['1']
    urd.truncate("tests_urd", 0)
    assert urd.since("tests_urd", 0) == []
    ordered_ts = [
        1, 2, 1000000000, '1978-01-01', '1978-01-01+0', '1978-01-01+2',
        '1978-01-01 00:00', '1978-01-01T00:00+42', '2017-06-27',
        '2017-06-27T17:00:00', '2017-06-27 17:00:00+42'
    ]
    for ts in ordered_ts:
        urd.begin("tests_urd")
        if ts == 1000000000:
            urd.get("tests_urd", '1')
        urd.build("test_build_kws")
        urd.finish("tests_urd", ts)
    urd.begin("tests_urd")
    urd.build("test_build_kws")
    urd.finish("tests_urd", ('2019-12', 3))
    ordered_ts.append('2019-12+3')
    ordered_ts = [str(v).replace(' ', 'T') for v in ordered_ts]
    assert urd.since("tests_urd", 0) == ordered_ts
    assert urd.since("tests_urd", '1978-01-01') == ordered_ts[4:]
    assert urd.peek_first("tests_urd").timestamp == '1'
    assert not urd.peek("tests_urd", 2).deps
    dep_jl = list(urd.peek("tests_urd", 1000000000).deps.values())[0].joblist
    assert dep_jl == [job]
    assert urd.peek(
        "tests_urd",
        ('2017-06-27 17:00:00', 42)).timestamp == '2017-06-27T17:00:00+42'
    while ordered_ts:
        urd.truncate("tests_urd", ordered_ts.pop())
        assert urd.since("tests_urd", 0) == ordered_ts, ordered_ts
    want = [date.today() - timedelta(10), datetime.utcnow()]
    for ts in want:
        urd.begin("tests_urd", ts)
        urd.build("test_build_kws")
        urd.finish("tests_urd")
    assert urd.since("tests_urd",
                     0) == [str(ts).replace(' ', 'T') for ts in want]
    urd.truncate("tests_urd", 0)

    for how in (
            "exiting",
            "dying",
    ):
        print()
        print("Verifying that an analysis process %s kills the job" % (how, ))
        time_before = monotonic()
        try:
            job = urd.build("test_analysis_died", how=how)
            print(
                "test_analysis_died completed successfully (%s), that shouldn't happen"
                % (job, ))
            exit(1)
        except JobError:
            time_after = monotonic()
        time_to_die = time_after - time_before
        if time_to_die > 13:
            print(
                "test_analysis_died took %d seconds to die, it should be faster"
                % (time_to_die, ))
            exit(1)
        elif time_to_die > 2:
            print(
                "test_analysis_died took %d seconds to die, so death detection is slow, but works"
                % (time_to_die, ))
        else:
            print(
                "test_analysis_died took %.1f seconds to die, so death detection works"
                % (time_to_die, ))

    print()
    print("Testing dataset creation, export, import")
    source = urd.build("test_datasetwriter")
    urd.build("test_datasetwriter_verify", source=source)
    source = urd.build("test_datasetwriter_copy", source=source)
    urd.build("test_datasetwriter_verify", source=source)
    urd.build("test_datasetwriter_parent")
    urd.build("test_dataset_in_prepare")
    ds = Dataset(source, "passed")
    csvname = "out.csv.gz"
    csvname_uncompressed = "out.csv"
    csv = urd.build("csvexport", filename=csvname, separator="\t", source=ds)
    csv_uncompressed = urd.build("csvexport",
                                 filename=csvname_uncompressed,
                                 separator="\t",
                                 source=ds)
    csv_quoted = urd.build("csvexport",
                           filename=csvname,
                           quote_fields='"',
                           source=ds)
    urd.build("csvexport", filename='slice%d.csv', sliced=True,
              source=ds)  # unused
    reimp_csv = urd.build("csvimport",
                          filename=csv.filename(csvname),
                          separator="\t")
    reimp_csv_uncompressed = urd.build(
        "csvimport",
        filename=csv_uncompressed.filename(csvname_uncompressed),
        separator="\t")
    reimp_csv_quoted = urd.build("csvimport",
                                 filename=csv_quoted.filename(csvname),
                                 quotes=True)
    urd.build("test_compare_datasets", a=reimp_csv, b=reimp_csv_uncompressed)
    urd.build("test_compare_datasets", a=reimp_csv, b=reimp_csv_quoted)

    print()
    print("Testing subjobs")
    urd.build("test_subjobs_type", typed=ds, untyped=reimp_csv)
    urd.build("test_subjobs_nesting")

    print()
    print("Testing datasets more")
    urd.build("test_dataset_column_names")
    urd.build("test_dataset_merge")
    urd.build("test_dataset_filter_columns")
    urd.build("test_dataset_empty_colname")
    urd.build("test_dataset_nan")

    print()
    print("Testing csvimport with more difficult files")
    urd.build("test_csvimport_corner_cases")
    urd.build("test_csvimport_separators")

    print()
    print("Testing csvexport with all column types, strange separators, ...")
    urd.build("test_csvexport_naming")
    urd.build("test_csvexport_all_coltypes")
    urd.build("test_csvexport_separators")
    urd.build("test_csvexport_chains")
    urd.build("test_csvexport_quoting")

    print()
    print("Testing dataset typing")
    try:
        # Test if numeric_comma is broken (presumably because no suitable locale
        # was found, since there are not actually any commas in the source dataset.)
        urd.build("dataset_type",
                  source=source,
                  numeric_comma=True,
                  column2type=dict(b="float64"),
                  defaults=dict(b="0"))
        comma_broken = False
    except JobError as e:
        comma_broken = True
        urd.warn()
        urd.warn('SKIPPED NUMERIC COMMA TESTS')
        urd.warn(
            'Follow the instructions in this error to enable numeric comma:')
        urd.warn()
        urd.warn(e.format_msg())
    urd.build("test_dataset_type_corner_cases", numeric_comma=not comma_broken)

    print()
    print("Testing dataset chaining, filtering, callbacks and rechaining")
    selfchain = urd.build("test_selfchain")
    urd.build("test_rechain", jobs=dict(selfchain=selfchain))
    urd.build("test_dataset_callbacks")

    print()
    print("Testing dataset sorting and rehashing (with subjobs again)")
    urd.build("test_sorting")
    urd.build("test_sort_stability")
    urd.build("test_sort_chaining")
    urd.build("test_sort_trigger")
    urd.build("test_hashpart")
    urd.build("test_dataset_type_hashing")
    urd.build("test_dataset_type_chaining")

    print()
    print("Test hashlabels")
    urd.build("test_hashlabel")

    print()
    print("Test dataset roundrobin iteration and slicing")
    urd.build("test_dataset_roundrobin")
    urd.build("test_dataset_slice")
    urd.build("test_dataset_unroundrobin")
    urd.build("test_dataset_unroundrobin_trigger")

    print()
    print("Test dataset_checksum")
    urd.build("test_dataset_checksum")

    print()
    print("Test csvimport_zip")
    urd.build("test_csvimport_zip")

    print()
    print("Test output handling")
    urd.build("test_output")
    urd.build("test_output_on_error")

    print()
    print("Test datetime types in options")
    urd.build("test_datetime")

    print()
    print("Test various utility functions")
    urd.build("test_optionenum")
    urd.build("test_json")
    urd.build("test_jobwithfile")
    urd.build("test_jobchain")
    summary = urd.build("test_summary", joblist=urd.joblist)
    summary.link_result('summary.html')
Exemple #9
0
    def __init__(self, server_config):
        package_list = server_config['method_directories']
        # read all methods
        self.db = {}
        for package, autodiscover in package_list.items():
            package_dir = self._check_package(package)
            db_ = read_methods_conf(package_dir, autodiscover)
            for method, meta in db_.items():
                if method in self.db:
                    raise AcceleratorError(
                        "Method \"%s\" defined both in \"%s\" and \"%s\"!" % (
                            method,
                            package,
                            self.db[method]['package'],
                        ))
                self.db[method] = DotDict(package=package, **meta)
        t0 = monotonic()
        per_runner = defaultdict(list)
        for key, val in iteritems(self.db):
            package = val['package']
            per_runner[val['version']].append((package, key))
        self.runners = new_runners(server_config, set(per_runner))
        warnings = []
        failed = []
        self.hash = {}
        self.params = {}
        self.descriptions = {}
        self.typing = {}
        for version, data in iteritems(per_runner):
            runner = self.runners.get(version)
            if not runner:
                msg = '%%s.%%s (unconfigured interpreter %s)' % (version)
                failed.extend(msg % t for t in sorted(data))
                continue
            v = runner.get_ax_version()
            if v != ax_version:
                if runner.python == sys.executable:
                    raise AcceleratorError(
                        "Server is using accelerator %s but %s is currently installed, please restart server."
                        % (
                            ax_version,
                            v,
                        ))
                else:
                    print(
                        "WARNING: Server is using accelerator %s but runner %r is using accelerator %s."
                        % (
                            ax_version,
                            version,
                            v,
                        ))
            w, f, h, p, d = runner.load_methods(package_list, data)
            warnings.extend(w)
            failed.extend(f)
            self.hash.update(h)
            self.params.update(p)
            self.descriptions.update(d)
        for key, params in iteritems(self.params):
            self.typing[key] = options2typing(key, params.options)
            params.defaults = params2defaults(params)
            params.required = options2required(params.options)

        def prt(a, prefix):
            maxlen = (max(len(e) for e in a) + len(prefix))
            line = '=' * maxlen
            print()
            print(line)
            for e in sorted(a):
                msg = prefix + e
                print(msg + ' ' * (maxlen - len(msg)))
            print(line)
            print()

        if warnings:
            prt(warnings, 'WARNING: ')
        if failed:
            print(colour.WHITEBG + colour.RED + colour.BOLD)
            prt(failed, 'FAILED to import ')
            print(colour.RESET)
            raise MethodLoadException(failed)
        print("Updated %d methods on %d runners in %.1f seconds" % (
            len(self.hash),
            len(per_runner),
            monotonic() - t0,
        ))
Exemple #10
0
def main(urd):
	assert urd.info.slices >= 3, "The tests don't work with less than 3 slices (you have %d)." % (urd.info.slices,)

	print()
	print("Testing urd.build and job.load")
	want = ({'foo': 'foo', 'a': 'a'}, {'foo': None, 'b': None}, {'foo': None, 'c': None})
	job = urd.build("test_build_kws")
	assert job.load() == want
	bad = None
	try:
		urd.build("test_build_kws", options=dict(foo='bar'), foo='baz')
		bad = 'Allowed ambiguous keyword "foo"'
	except Exception:
		pass
	assert not bad, bad
	want[0]['foo'] = 'bar'
	want[0]['a'] = 'A'
	job = urd.build("test_build_kws", options=dict(foo='bar'), a='A')
	assert job.load() == want
	assert urd.build("test_build_kws", options=dict(foo='bar'), a='A', b=None, c=None) == job
	want[2]['c'] = job
	job = urd.build("test_build_kws", options=dict(foo='bar', a='override this from kw'), a='A', c=job)
	assert job.load() == want
	want[0]['foo'] = 'foo'
	want[2]['c'] = job
	job = urd.build("test_build_kws", a='A', b=None, c=job, datasets=dict(b='overridden'))
	assert job.load() == want

	print()
	print("Testing urd.begin/end/truncate/get/peek/latest/first/since")
	urd.truncate("tests_urd", 0)
	assert not urd.peek_latest("tests_urd").joblist
	urd.begin("tests_urd", 1, caption="first")
	urd.build("test_build_kws")
	fin = urd.finish("tests_urd")
	assert fin == {'new': True, 'changed': False, 'is_ghost': False}, fin
	urd.begin("tests_urd", 1)
	job = urd.build("test_build_kws")
	fin = urd.finish("tests_urd", caption="first")
	assert fin == {'new': False, 'changed': False, 'is_ghost': False}, fin
	urd.begin("tests_urd", 1) # will be overridden to 2 in finish
	jl = urd.latest("tests_urd").joblist
	assert jl == [job], '%r != [%r]' % (jl, job,)
	urd.build("test_build_kws", options=dict(foo='bar', a='A'))
	urd.finish("tests_urd", 2, caption="second")
	u = urd.peek_latest("tests_urd")
	assert u.caption == "second"
	dep0 = list(u.deps.values())[0]
	assert dep0.caption == "first", dep0.caption
	assert dep0.joblist == jl, '%r != %r' % (dep0.joblist, jl,)
	assert urd.since("tests_urd", 0) == ['1', '2']
	urd.truncate("tests_urd", 2)
	assert urd.since("tests_urd", 0) == ['1']
	urd.truncate("tests_urd", 0)
	assert urd.since("tests_urd", 0) == []
	ordered_ts = [1, 2, 1000000000, '1978-01-01', '1978-01-01+0', '1978-01-01+2', '1978-01-01 00:00', '1978-01-01T00:00+42', '2017-06-27', '2017-06-27T17:00:00', '2017-06-27 17:00:00+42']
	for ts in ordered_ts:
		urd.begin("tests_urd")
		if ts == 1000000000:
			urd.get("tests_urd", '1')
		urd.build("test_build_kws")
		urd.finish("tests_urd", ts)
	urd.begin("tests_urd")
	urd.build("test_build_kws")
	urd.finish("tests_urd", ('2019-12', 3))
	ordered_ts.append('2019-12+3')
	ordered_ts = [str(v).replace(' ', 'T') for v in ordered_ts]
	assert urd.since("tests_urd", 0) == ordered_ts
	assert urd.since("tests_urd", '1978-01-01') == ordered_ts[4:]
	assert urd.peek_first("tests_urd").timestamp == '1'
	assert not urd.peek("tests_urd", 2).deps
	dep_jl = list(urd.peek("tests_urd", 1000000000).deps.values())[0].joblist
	assert dep_jl == [job]
	assert urd.peek("tests_urd", ('2017-06-27 17:00:00', 42)).timestamp == '2017-06-27T17:00:00+42'
	while ordered_ts:
		urd.truncate("tests_urd", ordered_ts.pop())
		assert urd.since("tests_urd", 0) == ordered_ts, ordered_ts
	want = [date.today() - timedelta(10), datetime.utcnow()]
	for ts in want:
		urd.begin("tests_urd", ts)
		urd.build("test_build_kws")
		urd.finish("tests_urd")
	assert urd.since("tests_urd", 0) == [str(ts).replace(' ', 'T') for ts in want]
	urd.truncate("tests_urd", 0)

	for how in ("exiting", "dying",):
		print()
		print("Verifying that an analysis process %s kills the job" % (how,))
		time_before = monotonic()
		try:
			job = urd.build("test_analysis_died", how=how)
			print("test_analysis_died completed successfully (%s), that shouldn't happen" % (job,))
			exit(1)
		except JobError:
			time_after = monotonic()
		time_to_die = time_after - time_before
		if time_to_die > 13:
			print("test_analysis_died took %d seconds to die, it should be faster" % (time_to_die,))
			exit(1)
		elif time_to_die > 2:
			print("test_analysis_died took %d seconds to die, so death detection is slow, but works" % (time_to_die,))
		else:
			print("test_analysis_died took %.1f seconds to die, so death detection works" % (time_to_die,))

	print()
	print("Testing dataset creation, export, import")
	source = urd.build("test_datasetwriter")
	urd.build("test_datasetwriter_verify", source=source)
	source = urd.build("test_datasetwriter_copy", source=source)
	urd.build("test_datasetwriter_verify", source=source)
	urd.build("test_datasetwriter_parent")
	urd.build("test_datasetwriter_missing_slices")
	urd.build("test_dataset_in_prepare")
	ds = Dataset(source, "passed")
	csvname = "out.csv.gz"
	csvname_uncompressed = "out.csv"
	csv = urd.build("csvexport", filename=csvname, separator="\t", source=ds)
	csv_uncompressed = urd.build("csvexport", filename=csvname_uncompressed, separator="\t", source=ds)
	csv_quoted = urd.build("csvexport", filename=csvname, quote_fields='"', source=ds)
	urd.build("csvexport", filename='slice%d.csv', sliced=True, source=ds) # unused
	reimp_csv = urd.build("csvimport", filename=csv.filename(csvname), separator="\t")
	reimp_csv_uncompressed = urd.build("csvimport", filename=csv_uncompressed.filename(csvname_uncompressed), separator="\t")
	reimp_csv_quoted = urd.build("csvimport", filename=csv_quoted.filename(csvname), quotes=True)
	urd.build("test_compare_datasets", a=reimp_csv, b=reimp_csv_uncompressed)
	urd.build("test_compare_datasets", a=reimp_csv, b=reimp_csv_quoted)

	print()
	print("Testing subjobs")
	urd.build("test_subjobs_type", typed=ds, untyped=reimp_csv)
	urd.build("test_subjobs_nesting")

	print()
	print("Testing datasets more")
	dsnamejob = urd.build("test_dataset_names")
	# make sure .datasets works with these names (only possible after job finishes)
	assert [ds.name for ds in dsnamejob.datasets] == dsnamejob.load()
	urd.build("test_dataset_column_names")
	urd.build("test_dataset_merge")
	urd.build("test_dataset_filter_columns")
	urd.build("test_dataset_empty_colname")
	urd.build("test_dataset_nan")
	urd.build('test_dataset_parsing_writer')

	print()
	print("Testing csvimport with more difficult files")
	urd.build("test_csvimport_corner_cases")
	urd.build("test_csvimport_separators")

	print()
	print("Testing csvexport with all column types, strange separators, ...")
	urd.build("test_csvexport_naming")
	urd.build("test_csvexport_all_coltypes")
	urd.build("test_csvexport_separators")
	urd.build("test_csvexport_chains")
	urd.build("test_csvexport_quoting")

	print()
	print("Testing dataset typing")
	try:
		# Test if numeric_comma is broken (presumably because no suitable locale
		# was found, since there are not actually any commas in the source dataset.)
		urd.build("dataset_type", source=source, numeric_comma=True, column2type=dict(b="float64"), defaults=dict(b="0"))
		comma_broken = False
	except JobError as e:
		comma_broken = True
		urd.warn()
		urd.warn('SKIPPED NUMERIC COMMA TESTS')
		urd.warn('Follow the instructions in this error to enable numeric comma:')
		urd.warn()
		urd.warn(e.format_msg())
	urd.build("test_dataset_type_corner_cases", numeric_comma=not comma_broken)
	urd.build("test_dataset_type_minmax")

	print()
	print("Testing dataset chaining, filtering, callbacks and rechaining")
	selfchain = urd.build("test_selfchain")
	urd.build("test_rechain", jobs=dict(selfchain=selfchain))
	urd.build("test_dataset_callbacks")

	print()
	print("Testing dataset sorting and rehashing (with subjobs again)")
	urd.build("test_sorting")
	urd.build("test_sort_stability")
	urd.build("test_sort_chaining")
	urd.build("test_sort_trigger")
	urd.build("test_hashpart")
	urd.build("test_dataset_type_hashing")
	urd.build("test_dataset_type_chaining")

	print()
	print("Test hashlabels")
	urd.build("test_hashlabel")

	print()
	print("Test dataset roundrobin iteration and slicing")
	urd.build("test_dataset_roundrobin")
	urd.build("test_dataset_slice")
	urd.build("test_dataset_unroundrobin")
	urd.build("test_dataset_unroundrobin_trigger")
	urd.build("test_number")

	print()
	print("Test dataset_checksum")
	urd.build("test_dataset_checksum")

	print()
	print("Test csvimport_zip")
	urd.build("test_csvimport_zip")

	print()
	print("Test output handling")
	urd.build("test_output")
	urd.build("test_output_on_error")

	print()
	print("Test datetime types in options")
	urd.build("test_datetime")

	print()
	print("Test various utility functions")
	urd.build("test_optionenum")
	urd.build("test_json")
	urd.build("test_jobwithfile")
	urd.build("test_jobchain")

	print()
	print("Test shell commands")
	from sys import argv
	from accelerator.shell import cfg
	command_prefix = [argv[0], '--config', cfg.config_filename]
	urd.truncate("tests_urd", 0)
	# These have to be rebuilt every time, or the resolving might give other jobs.
	urd.begin("tests_urd", 1)
	a = urd.build('test_shell_data', force_build=True)
	b = urd.build('test_shell_data', force_build=True)
	c = urd.build('test_shell_data', datasets={'previous': a})
	urd.finish("tests_urd")
	urd.begin("tests_urd", "2021-09-27T03:14")
	d = urd.build('test_shell_data', datasets={'previous': c, 'parent': a + '/j'}, jobs={'previous': b})
	urd.finish("tests_urd")
	urd.begin("tests_urd", "2021-09-27T03:14+1")
	e = urd.build('test_shell_data', jobs={'previous': d})
	urd.finish("tests_urd")
	# ~ finds earlier jobs with that method, ^ follows jobs.previous falling back to datasets.previous.
	want = {
		'test_shell_data': e, # just the plain method -> job resolution.
		c + '~~': a, # not using .previous, just going back jobs
		'test_shell_data~3': b, # numbered tildes
		'test_shell_data~2^': a, # ~~ goes to c, ^ follows .previous to a.
		d + '^': b, # prefers jobs.previous to .datasets.previous
		':tests_urd:': e,
		':tests_urd/2021-09-27T03:14:': d,
		':tests_urd/1:1': b, # 1 is the second entry
		':tests_urd/1:-3': a, # third entry from the end
		':tests_urd:^': d,
		':tests_urd/2021-09-27T03:14+1^^:0': a, # ^ in :: goes to earlier entries
		':tests_urd/1~:': d, # ~ in :: goes to later entries
	}
	urd.build('test_shell_job', command_prefix=command_prefix, want=want)
	# the job is resolved first, so the old specs give the same results
	want = {spec: job + '/default' for spec, job in want.items()}
	want.update({
		d + '/j^': a + '/j', # .parent
		d + '/j~': b + '/default', # .previous
		'test_shell_data~/j^': a + '/j', # both job and ds movement
		e + '/j~^': a + '/j', # .previous.parent
		# some urdlist ones with datasets on
		':tests_urd:/j': e + '/j',
		':tests_urd/1:1/j': b + '/j',
		':tests_urd:^/j': d + '/j',
		':tests_urd/2021-09-27T03:14:/j': d + '/j',
		# finally one with : in the list and / in the ds name
		':tests_urd/2021-09-27T03:14+1:0/name/with/slash': e + '/name/with/slash',
	})
	urd.build('test_shell_ds', command_prefix=command_prefix, want=want)
	urd.truncate("tests_urd", 0)
	urd.build('test_shell_grep', command_prefix=command_prefix)

	summary = urd.build("test_summary", joblist=urd.joblist)
	summary.link_result('summary.html')
Exemple #11
0
def _output(pid, msg):
	_send('output', '%f\0%s' % (monotonic(), msg,), pid=pid)
Exemple #12
0
def fork_analysis(slices, concurrency, analysis_func, kw, preserve_result,
                  output_fds):
    from multiprocessing import Process, Queue
    import gc
    q = Queue()
    children = []
    t = monotonic()
    pid = os.getpid()
    if hasattr(gc, 'freeze'):
        # See https://bugs.python.org/issue31558
        # (Though we keep the gc disabled by default.)
        gc.freeze()
    delayed_start = False
    for i in range(slices):
        if i == concurrency:
            assert concurrency != 0
            # The rest will wait on this queue
            delayed_start = Queue()
        p = Process(target=call_analysis,
                    args=(analysis_func, i, delayed_start, q, preserve_result,
                          pid, output_fds),
                    kwargs=kw,
                    name='analysis-%d' % (i, ))
        p.start()
        children.append(p)
    for fd in output_fds:
        os.close(fd)
    per_slice = []
    temp_files = {}
    no_children_no_messages = False
    while len(per_slice) < slices:
        still_alive = []
        for p in children:
            if p.is_alive():
                still_alive.append(p)
            else:
                p.join()
                if p.exitcode:
                    raise Exception("%s terminated with exitcode %d" % (
                        p.name,
                        p.exitcode,
                    ))
        children = still_alive
        # If a process dies badly we may never get a message here.
        # No need to handle that very quickly though, 10 seconds is fine.
        # (Typically this is caused by running out of memory.)
        try:
            s_no, s_t, s_temp_files, s_dw_lens, s_dw_minmax, s_tb = q.get(
                timeout=10)
        except QueueEmpty:
            if not children:
                # No children left, so they must have all sent their messages.
                # Still, just to be sure there isn't a race, wait one iteration more.
                if no_children_no_messages:
                    raise Exception(
                        "All analysis processes exited cleanly, but not all returned a result."
                    )
                else:
                    no_children_no_messages = True
            continue
        if s_tb:
            data = [{'analysis(%d)' % (s_no, ): s_tb}, None]
            writeall(_prof_fd, json.dumps(data).encode('utf-8'))
            exitfunction()
        if delayed_start:
            # Another analysis is allowed to run now
            delayed_start.put(None)
        per_slice.append((s_no, s_t))
        temp_files.update(s_temp_files)
        for name, lens in s_dw_lens.items():
            dataset._datasetwriters[name]._lens.update(lens)
        for name, minmax in s_dw_minmax.items():
            dataset._datasetwriters[name]._minmax.update(minmax)
    g.update_top_status("Waiting for all slices to finish cleanup")
    for p in children:
        p.join()
    if preserve_result:
        res_seq = ResultIterMagic(
            slices, reuse_msg="analysis_res is an iterator, don't re-use it")
    else:
        res_seq = None
    return [v - t for k, v in sorted(per_slice)], temp_files, res_seq
Exemple #13
0
def fork_analysis(slices, concurrency, analysis_func, kw, preserve_result,
                  output_fds, q):
    from multiprocessing import Process
    import gc
    children = []
    t = monotonic()
    pid = os.getpid()
    if hasattr(gc, 'freeze'):
        # See https://bugs.python.org/issue31558
        # (Though we keep the gc disabled by default.)
        gc.freeze()
    delayed_start = False
    delayed_start_todo = 0
    for i in range(slices):
        if i == concurrency:
            assert concurrency != 0
            # The rest will wait on this queue
            delayed_start = os.pipe()
            delayed_start_todo = slices - i
        p = SimplifiedProcess(target=call_analysis,
                              args=(analysis_func, i, delayed_start, q,
                                    preserve_result, pid, output_fds),
                              kwargs=kw,
                              name='analysis-%d' % (i, ))
        children.append(p)
    for fd in output_fds:
        os.close(fd)
    if delayed_start:
        os.close(delayed_start[0])
    q.make_reader()
    per_slice = []
    temp_files = {}
    no_children_no_messages = False
    reap_time = monotonic() + 5
    exit_count = 0
    while len(per_slice) < slices:
        if exit_count > 0 or reap_time <= monotonic():
            still_alive = []
            for p in children:
                if p.is_alive():
                    still_alive.append(p)
                else:
                    exit_count -= 1
                    if p.exitcode:
                        raise AcceleratorError(
                            "%s terminated with exitcode %d" % (
                                p.name,
                                p.exitcode,
                            ))
            children = still_alive
            reap_time = monotonic() + 5
        # If a process dies badly we may never get a message here.
        # (iowrapper tries to tell us though.)
        # No need to handle that very quickly though, 10 seconds is fine.
        # (Typically this is caused by running out of memory.)
        try:
            msg = q.get(timeout=10)
            if not msg:
                # Notification from iowrapper, so we wake up (quickly) even if
                # the process died badly (e.g. from running out of memory).
                exit_count += 1
                continue
            s_no, s_t, s_temp_files, s_dw_lens, s_dw_minmax, s_dw_compressions, s_tb = msg
        except QueueEmpty:
            if not children:
                # No children left, so they must have all sent their messages.
                # Still, just to be sure there isn't a race, wait one iteration more.
                if no_children_no_messages:
                    raise AcceleratorError(
                        "All analysis processes exited cleanly, but not all returned a result."
                    )
                else:
                    no_children_no_messages = True
            continue
        if s_tb:
            data = [{'analysis(%d)' % (s_no, ): s_tb}, None]
            writeall(_prof_fd, json.dumps(data).encode('utf-8'))
            exitfunction()
        if delayed_start_todo:
            # Another analysis is allowed to run now
            os.write(delayed_start[1], b'a')
            delayed_start_todo -= 1
        per_slice.append((s_no, s_t))
        temp_files.update(s_temp_files)
        for name, lens in s_dw_lens.items():
            dataset._datasetwriters[name]._lens.update(lens)
        for name, minmax in s_dw_minmax.items():
            dataset._datasetwriters[name]._minmax.update(minmax)
        for name, compressions in s_dw_compressions.items():
            dataset._datasetwriters[name]._compressions.update(compressions)
    g.update_top_status("Waiting for all slices to finish cleanup")
    q.close()
    if delayed_start:
        os.close(delayed_start[1])
    for p in children:
        p.join()
    if preserve_result:
        res_seq = ResultIterMagic(
            slices, reuse_msg="analysis_res is an iterator, don't re-use it")
    else:
        res_seq = None
    return [v - t for k, v in sorted(per_slice)], temp_files, res_seq
Exemple #14
0
    def _handle_req(self, path, args):
        if path[0] == 'status':
            data = job_tracking.get(args.get('subjob_cookie') or None)
            if not data:
                self.do_response(400, 'text/plain', 'bad subjob_cookie!\n')
                return
            timeout = min(float(args.get('timeout', 0)), 128)
            status = DotDict(idle=data.lock.acquire(False))
            deadline = monotonic() + timeout
            while not status.idle and monotonic() < deadline:
                time.sleep(0.1)
                status.idle = data.lock.acquire(False)
            if status.idle:
                if data.last_error:
                    status.last_error_time = data.last_error[0]
                status.last_time = data.last_time
                data.lock.release()
            elif path == ['status', 'full']:
                status.status_stacks, status.current = status_stacks_export()
            status.report_t = monotonic()
            self.do_response(200, "text/json", status)
            return

        elif path == ['last_error']:
            data = job_tracking.get(args.get('subjob_cookie') or None)
            if not data:
                self.do_response(400, 'text/plain', 'bad subjob_cookie!\n')
                return
            status = DotDict()
            if data.last_error:
                status.time = data.last_error[0]
                status.last_error = data.last_error[1]
            self.do_response(200, "text/json", status)
            return

        elif path == ['list_workdirs']:
            ws = {k: v.path for k, v in self.ctrl.list_workdirs().items()}
            self.do_response(200, "text/json", ws)

        elif path[0] == 'workdir':
            self.do_response(200, "text/json",
                             self.ctrl.DataBase.db_by_workdir[path[1]])

        elif path == ['config']:
            self.do_response(200, "text/json", self.ctrl.config)

        elif path == ['update_methods']:
            self.do_response(200, "text/json", self.ctrl.update_methods())

        elif path == ['methods']:
            """ return a json with everything the Method object knows about the methods """
            self.do_response(200, "text/json", self.ctrl.get_methods())

        elif path[0] == 'method_info':
            method = path[1]
            self.do_response(200, "text/json", self.ctrl.method_info(method))

        elif path[0] == 'workspace_info':
            self.do_response(200, 'text/json',
                             self.ctrl.get_workspace_details())

        elif path[0] == 'abort':
            tokill = list(children)
            print('Force abort', tokill)
            for child in tokill:
                os.killpg(child, signal.SIGKILL)
            self.do_response(200, 'text/json', {'killed': len(tokill)})

        elif path[0] == 'method2job':
            method, num = path[1:]
            jobs = self.ctrl.DataBase.db_by_method.get(method, ())
            start_ix = 0
            start_from = args.get('start_from')
            if start_from:
                for start_ix, job in enumerate(jobs):
                    if job.id == start_from:
                        break
                else:
                    start_ix = None
            if start_ix is None:
                res = {
                    'error': '%s is not a current %s job' % (
                        start_from,
                        method,
                    )
                }
            else:
                num = int(num)
                if not jobs:
                    res = {
                        'error':
                        'no current jobs with method %s available' % (method, )
                    }
                elif num + start_ix >= len(jobs):
                    res = {
                        'error':
                        'tried to go %d jobs back from %s, but only %d earlier (current) jobs available'
                        % (
                            num,
                            jobs[start_ix].id,
                            len(jobs) - start_ix - 1,
                        )
                    }
                else:
                    res = {'id': jobs[num + start_ix].id}
            self.do_response(200, 'text/json', res)

        elif path[0] == 'job_is_current':
            job = Job(path[1])
            job = self.ctrl.DataBase.db_by_workdir[job.workdir].get(job)
            self.do_response(200, 'text/json', bool(job and job['current']))

        elif path == ['submit']:
            if self.ctrl.broken:
                self.do_response(
                    500, "text/json", {
                        'broken':
                        self.ctrl.broken,
                        'error':
                        'Broken methods: ' + ', '.join(
                            sorted(
                                m.split('.')[-1][2:]
                                for m in self.ctrl.broken))
                    })
            elif 'json' in args:
                if DEBUG_WRITE_JSON:
                    with open('DEBUG_WRITE.json', 'wb') as fh:
                        fh.write(args['json'])
                setup = json_decode(args['json'])
                data = job_tracking.get(setup.get('subjob_cookie') or None)
                if not data:
                    self.do_response(403, 'text/plain', 'bad subjob_cookie!\n')
                    return
                if len(job_tracking) - 1 > 5:  # max five levels
                    print('Too deep subjob nesting!')
                    self.do_response(403, 'text/plain',
                                     'Too deep subjob nesting')
                    return
                if data.lock.acquire(False):
                    still_locked = True
                    respond_after = True
                    try:
                        if self.DEBUG:
                            print('@server.py:  Got the lock!',
                                  file=sys.stderr)
                        workdir = setup.get('workdir', data.workdir)
                        jobidv, job_res = self.ctrl.initialise_jobs(
                            setup, workdir)
                        job_res['done'] = False
                        if jobidv:
                            error = []
                            tlock = TLock()
                            link2job = {
                                j['link']: j
                                for j in job_res['jobs'].values()
                            }

                            def run(jobidv, tlock):
                                for jobid in jobidv:
                                    passed_cookie = None
                                    # This is not a race - all higher locks are locked too.
                                    while passed_cookie in job_tracking:
                                        passed_cookie = gen_cookie()
                                    concurrency_map = dict(
                                        data.concurrency_map)
                                    concurrency_map.update(
                                        setup.get('concurrency_map', ()))
                                    job_tracking[passed_cookie] = DotDict(
                                        lock=JLock(),
                                        last_error=None,
                                        last_time=0,
                                        workdir=workdir,
                                        concurrency_map=concurrency_map,
                                    )
                                    try:
                                        explicit_concurrency = setup.get(
                                            'concurrency'
                                        ) or concurrency_map.get(setup.method)
                                        concurrency = explicit_concurrency or concurrency_map.get(
                                            '-default-')
                                        if concurrency and setup.method == 'csvimport':
                                            # just to be safe, check the package too
                                            if load_setup(
                                                    jobid
                                            ).package == 'accelerator.standard_methods':
                                                # ignore default concurrency, error on explicit.
                                                if explicit_concurrency:
                                                    raise JobError(
                                                        jobid, 'csvimport', {
                                                            'server':
                                                            'csvimport can not run with reduced concurrency'
                                                        })
                                                concurrency = None
                                        self.ctrl.run_job(
                                            jobid,
                                            subjob_cookie=passed_cookie,
                                            parent_pid=setup.get(
                                                'parent_pid', 0),
                                            concurrency=concurrency)
                                        # update database since a new jobid was just created
                                        job = self.ctrl.add_single_jobid(jobid)
                                        with tlock:
                                            link2job[jobid]['make'] = 'DONE'
                                            link2job[jobid][
                                                'total_time'] = job.total
                                    except JobError as e:
                                        error.append(
                                            [e.job, e.method, e.status])
                                        with tlock:
                                            link2job[jobid]['make'] = 'FAIL'
                                        return
                                    finally:
                                        del job_tracking[passed_cookie]
                                # everything was built ok, update symlink
                                try:
                                    dn = self.ctrl.workspaces[workdir].path
                                    ln = os.path.join(dn, workdir + "-LATEST_")
                                    try:
                                        os.unlink(ln)
                                    except OSError:
                                        pass
                                    os.symlink(jobid, ln)
                                    os.rename(
                                        ln,
                                        os.path.join(dn, workdir + "-LATEST"))
                                except OSError:
                                    traceback.print_exc(file=sys.stderr)

                            t = Thread(target=run,
                                       name="job runner",
                                       args=(
                                           jobidv,
                                           tlock,
                                       ))
                            t.daemon = True
                            t.start()
                            t.join(2)  # give job two seconds to complete
                            with tlock:
                                for j in link2job.values():
                                    if j['make'] in (
                                            True,
                                            'FAIL',
                                    ):
                                        respond_after = False
                                        job_res_json = json_encode(job_res)
                                        break
                            if not respond_after:  # not all jobs are done yet, give partial response
                                self.do_response(200, "text/json",
                                                 job_res_json)
                            t.join()  # wait until actually complete
                            del tlock
                            del t
                            # verify that all jobs got built.
                            total_time = 0
                            for j in link2job.values():
                                jobid = j['link']
                                if j['make'] == True:
                                    # Well, crap.
                                    error.append([
                                        jobid, "unknown", {
                                            "INTERNAL": "Not built"
                                        }
                                    ])
                                    print("INTERNAL ERROR IN JOB BUILDING!",
                                          file=sys.stderr)
                                total_time += j.get('total_time', 0)
                            if error:
                                data.last_error = (time.time(), error)
                            data.last_time = total_time
                    except Exception as e:
                        if respond_after:
                            data.lock.release()
                            still_locked = False
                            self.do_response(500, "text/json",
                                             {'error': str(e)})
                        raise
                    finally:
                        if still_locked:
                            data.lock.release()
                    if respond_after:
                        job_res['done'] = True
                        self.do_response(200, "text/json", job_res)
                    if self.DEBUG:
                        print("@server.py:  Process releases lock!",
                              file=sys.stderr
                              )  # note: has already done http response
                else:
                    self.do_response(503, 'text/plain',
                                     'Busy doing work for you...\n')
            else:
                self.do_response(400, 'text/plain', 'Missing json input!\n')
        else:
            self.do_response(404, 'text/plain', 'Unknown path\n')
            return
Exemple #15
0
 def show(sig, frame):
     t = monotonic()
     verbose = (previous[0] + 2 > t)  # within 2 seconds of previous
     previous[0] = t
     ds_ixes = []
     progress_lines = []
     progress_fraction = []
     for sliceno in want_slices:
         ds_ix, done_lines = status[sliceno]
         ds_ixes.append(ds_ix)
         max_possible = min(
             done_lines + status_interval[sliceno],
             total_lines_per_slice_at_ds[ds_ix + 1][sliceno])
         done_lines = (done_lines +
                       max_possible) / 2  # middle of the possibilities
         progress_lines.append(done_lines)
         total = total_lines_per_slice_at_ds[-1][sliceno]
         if total == 0:
             progress_fraction.append(1)
         else:
             progress_fraction.append(done_lines / total)
     progress_total = sum(progress_lines) / (total_lines or 1)
     bad_cutoff = progress_total - 0.1
     if verbose:
         show_ds = (len(datasets) > 1 and min(ds_ixes) != max(ds_ixes))
         for sliceno, ds_ix, p in zip(want_slices, ds_ixes,
                                      progress_fraction):
             if ds_ix == len(datasets):
                 msg = 'DONE'
             else:
                 msg = '{0:d}% of {1:n} lines'.format(
                     round(p * 100),
                     total_lines_per_slice_at_ds[-1][sliceno])
                 if show_ds:
                     msg = '%s (in %s)' % (
                         msg,
                         datasets[ds_ix].quoted,
                     )
             msg = '%9d: %s' % (
                 sliceno,
                 msg,
             )
             if p < bad_cutoff:
                 msg = colour(msg, 'grep/infohighlight')
             else:
                 msg = colour(msg, 'grep/info')
             write(2, msg.encode('utf-8') + b'\n')
     msg = '{0:d}% of {1:n} lines'.format(round(progress_total * 100),
                                          total_lines)
     if len(datasets) > 1:
         min_ds = min(ds_ixes)
         max_ds = max(ds_ixes)
         if min_ds < len(datasets):
             ds_name = datasets[min_ds].quoted
             extra = '' if min_ds == max_ds else ' ++'
             msg = '%s (in %s%s)' % (
                 msg,
                 ds_name,
                 extra,
             )
     worst = min(progress_fraction)
     if worst < bad_cutoff:
         msg = '%s, worst %d%%' % (
             msg,
             round(worst * 100),
         )
     msg = colour('  SUMMARY: %s' % (msg, ), 'grep/info')
     write(2, msg.encode('utf-8') + b'\n')
Exemple #16
0
def call_analysis(analysis_func, sliceno_, delayed_start, q, preserve_result,
                  parent_pid, output_fds, **kw):
    try:
        q.make_writer()
        # tell iowrapper our PID, so our output goes to the right status stack.
        # (the pty is not quite a transparent transport ('\n' transforms into
        # '\r\n'), so we use a fairly human readable encoding.)
        writeall(output_fds[sliceno_], b'%16x' % (os.getpid(), ))
        # use our iowrapper fd instead of stdout/stderr
        os.dup2(output_fds[sliceno_], 1)
        os.dup2(output_fds[sliceno_], 2)
        for fd in output_fds:
            os.close(fd)
        os.close(_prof_fd)
        slicename = 'analysis(%d)' % (sliceno_, )
        setproctitle(slicename)
        if delayed_start:
            os.close(delayed_start[1])
            update = statmsg._start(
                'waiting for concurrency limit (%d)' % (sliceno_, ),
                parent_pid, True)
            if os.read(delayed_start[0], 1) != b'a':
                raise AcceleratorError('bad delayed_start, giving up')
            update(slicename)
            os.close(delayed_start[0])
        else:
            statmsg._start(slicename, parent_pid, True)
        kw['sliceno'] = g.sliceno = sliceno_
        for dw in dataset._datasetwriters.values():
            if dw._for_single_slice is None:
                dw._set_slice(sliceno_)
        res = analysis_func(**kw)
        if preserve_result:
            # Remove defaultdicts until we find one with a picklable default_factory.
            # (This is what you end up doing manually anyway.)
            def picklable(v):
                try:
                    pickle.dumps(v, pickle.HIGHEST_PROTOCOL)
                    return True
                except Exception:
                    return False

            def fixup(d):
                if isinstance(
                        d, defaultdict) and not picklable(d.default_factory):
                    if not d:
                        return {}
                    v = next(iteritems(d))
                    if isinstance(
                            v,
                            defaultdict) and not picklable(v.default_factory):
                        return {k: fixup(v) for k, v in iteritems(d)}
                    else:
                        return dict(d)
                else:
                    return d

            def save(item, name):
                blob.save(fixup(item), name, sliceno=sliceno_, temp=True)

            if isinstance(res, tuple):
                if sliceno_ == 0:
                    blob.save(len(res), "Analysis.tuple", temp=True)
                for ix, item in enumerate(res):
                    save(item, "Analysis.%d." % (ix, ))
            else:
                if sliceno_ == 0:
                    blob.save(False, "Analysis.tuple", temp=True)
                save(res, "Analysis.")
        from accelerator.extras import saved_files
        dw_lens = {}
        dw_minmax = {}
        dw_compressions = {}
        for name, dw in dataset._datasetwriters.items():
            if dw._for_single_slice or sliceno_ == 0:
                dw_compressions[name] = dw._compressions
            if dw._for_single_slice in (
                    None,
                    sliceno_,
            ):
                dw.close()
                dw_lens[name] = dw._lens
                dw_minmax[name] = dw._minmax
        c_fflush()
        q.put((
            sliceno_,
            monotonic(),
            saved_files,
            dw_lens,
            dw_minmax,
            dw_compressions,
            None,
        ))
        q.close()
    except:
        c_fflush()
        msg = fmt_tb(1)
        print(msg)
        q.put((
            sliceno_,
            monotonic(),
            {},
            {},
            {},
            {},
            msg,
        ))
        q.close()
        sleep(5)  # give launcher time to report error (and kill us)
        exitfunction()
Exemple #17
0
def execute_process(workdir,
                    jobid,
                    slices,
                    concurrency,
                    result_directory,
                    common_directory,
                    input_directory,
                    index=None,
                    workdirs=None,
                    server_url=None,
                    subjob_cookie=None,
                    parent_pid=0):
    WORKDIRS.update(workdirs)

    g.job = jobid
    setproctitle('launch')
    path = os.path.join(workdir, jobid)
    try:
        os.chdir(path)
    except Exception:
        print("Cannot cd to workdir", path)
        exit(1)

    g.params = params = job_params()
    method_ref = import_module(params.package + '.a_' + params.method)
    g.sliceno = -1

    g.job = CurrentJob(jobid, params, result_directory, input_directory)
    g.slices = slices

    g.options = params.options
    g.datasets = params.datasets
    g.jobs = params.jobs

    method_ref.options = params.options
    method_ref.datasets = params.datasets
    method_ref.jobs = params.jobs

    g.server_url = server_url
    g.running = 'launch'
    statmsg._start('%s %s' % (
        jobid,
        params.method,
    ), parent_pid)

    def dummy():
        pass

    prepare_func = getattr(method_ref, 'prepare', dummy)
    analysis_func = getattr(method_ref, 'analysis', dummy)
    synthesis_func = getattr(method_ref, 'synthesis', dummy)

    synthesis_needs_analysis = 'analysis_res' in getarglist(synthesis_func)

    fd2pid, names, masters, slaves = iowrapper.setup(
        slices, prepare_func is not dummy, analysis_func is not dummy)

    def switch_output():
        fd = slaves.pop()
        os.dup2(fd, 1)
        os.dup2(fd, 2)
        os.close(fd)

    if analysis_func is dummy:
        q = None
    else:
        q = LockFreeQueue()
    iowrapper.run_reader(fd2pid, names, masters, slaves, q=q)
    for fd in masters:
        os.close(fd)

    # A chain must be finished from the back, so sort on that.
    sortnum_cache = {}

    def dw_sortnum(name):
        if name not in sortnum_cache:
            dw = dataset._datasetwriters.get(name)
            if not dw:  # manually .finish()ed
                num = -1
            elif dw.previous and dw.previous.startswith(jobid + '/'):
                pname = dw.previous.split('/')[1]
                num = dw_sortnum(pname) + 1
            else:
                num = 0
            sortnum_cache[name] = num
        return sortnum_cache[name]

    prof = {}
    if prepare_func is dummy:
        prof['prepare'] = 0  # truthish!
    else:
        t = monotonic()
        switch_output()
        g.running = 'prepare'
        g.subjob_cookie = subjob_cookie
        setproctitle(g.running)
        with statmsg.status(g.running):
            g.prepare_res = method_ref.prepare(**args_for(method_ref.prepare))
            to_finish = [
                dw.name for dw in dataset._datasetwriters.values()
                if dw._started
            ]
            if to_finish:
                with statmsg.status("Finishing datasets"):
                    for name in sorted(to_finish, key=dw_sortnum):
                        dataset._datasetwriters[name].finish()
        c_fflush()
        prof['prepare'] = monotonic() - t
    switch_output()
    setproctitle('launch')
    from accelerator.extras import saved_files
    if analysis_func is dummy:
        prof['per_slice'] = []
        prof['analysis'] = 0
    else:
        t = monotonic()
        g.running = 'analysis'
        g.subjob_cookie = None  # subjobs are not allowed from analysis
        with statmsg.status(
                'Waiting for all slices to finish analysis') as update:
            g.update_top_status = update
            prof['per_slice'], files, g.analysis_res = fork_analysis(
                slices, concurrency, analysis_func, args_for(analysis_func),
                synthesis_needs_analysis, slaves, q)
            del g.update_top_status
        prof['analysis'] = monotonic() - t
        saved_files.update(files)
    t = monotonic()
    g.running = 'synthesis'
    g.subjob_cookie = subjob_cookie
    setproctitle(g.running)
    with statmsg.status(g.running):
        synthesis_res = synthesis_func(**args_for(synthesis_func))
        if synthesis_res is not None:
            blob.save(synthesis_res, temp=False)
        if dataset._datasetwriters:
            with statmsg.status("Finishing datasets"):
                for name in sorted(dataset._datasetwriters, key=dw_sortnum):
                    dataset._datasetwriters[name].finish()
    if dataset._datasets_written:
        blob.save(dataset._datasets_written,
                  'DS/LIST',
                  temp=False,
                  _hidden=True)
    c_fflush()
    t = monotonic() - t
    prof['synthesis'] = t

    from accelerator.subjobs import _record
    return None, (prof, saved_files, _record)