Example #1
0
 def _check_package(self, package):
     try:
         package_mod = import_module(package)
         if not hasattr(package_mod, "__file__"):
             raise ImportError("no __file__")
     except ImportError:
         raise AcceleratorError(
             "Failed to import %s, maybe missing __init__.py?" %
             (package, ))
     if not package_mod.__file__:
         raise AcceleratorError(
             "%s has no __file__, maybe missing __init__.py?" % (package, ))
     return os.path.dirname(package_mod.__file__)
Example #2
0
def read_methods_conf(dirname, autodiscover):
    """ read and parse the methods.conf file """
    db = {}
    if autodiscover:
        methods = glob(os.path.join(dirname, 'a_*.py'))
        for method in methods:
            if method not in db:
                db[os.path.basename(method)[2:-3]] = DotDict(version='DEFAULT')
    filename = os.path.join(dirname, 'methods.conf')
    if autodiscover and not os.path.exists(filename):
        return db
    with open(filename) as fh:
        for lineno, line in enumerate(fh, 1):
            data = line.split('#')[0].split()
            if not data:
                continue
            method = data.pop(0)
            if autodiscover and (method not in db):
                # in auto-discover, anything in methods.conf goes
                continue
            try:
                version = data.pop(0)
            except IndexError:
                version = 'DEFAULT'
            if data:
                raise AcceleratorError('Trailing garbage on %s:%d: %s' % (
                    filename,
                    lineno,
                    line,
                ))
            db[method] = DotDict(version=version)
    return db
Example #3
0
def _reprify(o):
    if isinstance(o, OptionDefault):
        o = o.default
    if isinstance(o, (bytes, str, int, float, long, bool, NoneType)):
        return repr(o)
    if isinstance(o, unicode):
        # not reachable in PY3, the above "str" matches
        return repr(o.encode('utf-8'))
    if isinstance(o, set):
        return '[%s]' % (', '.join(map(_reprify, _sorted_set(o))), )
    if isinstance(o, (list, tuple)):
        return '[%s]' % (', '.join(map(_reprify, o)), )
    if isinstance(o, dict):
        return '{%s}' % (', '.join('%s: %s' % (
            _reprify(k),
            _reprify(v),
        ) for k, v in sorted(iteritems(o))), )
    if isinstance(o, (
            datetime.datetime,
            datetime.date,
            datetime.time,
            datetime.timedelta,
    )):
        return str(o)
    raise AcceleratorError('Unhandled %s in dependency resolution' %
                           (type(o), ))
Example #4
0
def load_setup(jobid):
    """Loads but does not type setup.json from jobid.
	You probably want to use extras.job_params instead.
	"""
    d = json_load('setup.json', jobid)
    version = d.version
    if version == 1:
        d.jobs = d.pop('jobids')
        version = 2
    if version == 2:
        if 'exectime' not in d and 'profile' in d:
            d.exectime = d.pop('profile')
        d.versions = DotDict()
        python_path = d.pop('python', None)
        if python_path:
            d.versions.python_path = python_path
        version = 3
    if version == 3:
        # no changes here, it's only used to know how to find datasets
        version = 4
    if version != 4:
        raise AcceleratorError(
            "Don't know how to load setup.json version %d (in %s)" % (
                d.version,
                jobid,
            ))
    return d
Example #5
0
 def spill_type(colname):
     orig_colname = rev_rename.get(colname, colname)
     types = {ds.columns[orig_colname].type for ds in chain}
     if 'bytes' in types:
         types.discard('ascii')
         types.discard('unicode')
     if 'unicode' in types:
         types.discard('ascii')
     if len(types) == 1:
         return types.pop()
     raise AcceleratorError('Incompatible types for column %r: %r' % (
         colname,
         types,
     ))
Example #6
0
	def _do(self, op, data):
		from accelerator.compat import pickle
		with self._lock:
			cookie = self.cookie
			self.cookie += 1
			# have to register waiter before we send packet (to avoid a race)
			waiter = self._waiter(cookie)
			data = pickle.dumps((cookie, data), 2)
			header = struct.pack('<cI', op, len(data))
			self.sock.sendall(header + data)
		# must wait without the lock, otherwise all this threading gets us nothing.
		res = waiter()
		if res is None:
			from accelerator.error import AcceleratorError
			raise AcceleratorError("Runner exited unexpectedly.")
		return res
Example #7
0
def job_post(jobid):
    job = Job(jobid)
    d = job.json_load('post.json')
    version = d.get('version', 0)
    if version == 0:
        prefix = job.path + '/'
        d.files = sorted(fn[len(prefix):] if fn.startswith(prefix) else fn
                         for fn in d.files)
        version = 1
    if version != 1:
        raise AcceleratorError(
            "Don't know how to load post.json version %d (in %s)" % (
                d.version,
                jobid,
            ))
    return d
Example #8
0
    def __init__(self, server_config):
        package_list = server_config['method_directories']
        # read all methods
        self.db = {}
        for package, autodiscover in package_list.items():
            package_dir = self._check_package(package)
            db_ = read_methods_conf(package_dir, autodiscover)
            for method, meta in db_.items():
                if method in self.db:
                    raise AcceleratorError(
                        "Method \"%s\" defined both in \"%s\" and \"%s\"!" % (
                            method,
                            package,
                            self.db[method]['package'],
                        ))
                self.db[method] = DotDict(package=package, **meta)
        t0 = monotonic()
        per_runner = defaultdict(list)
        for key, val in iteritems(self.db):
            package = val['package']
            per_runner[val['version']].append((package, key))
        self.runners = new_runners(server_config, set(per_runner))
        warnings = []
        failed = []
        self.hash = {}
        self.params = {}
        self.descriptions = {}
        self.typing = {}
        for version, data in iteritems(per_runner):
            runner = self.runners.get(version)
            if not runner:
                msg = '%%s.%%s (unconfigured interpreter %s)' % (version)
                failed.extend(msg % t for t in sorted(data))
                continue
            v = runner.get_ax_version()
            if v != ax_version:
                if runner.python == sys.executable:
                    raise AcceleratorError(
                        "Server is using accelerator %s but %s is currently installed, please restart server."
                        % (
                            ax_version,
                            v,
                        ))
                else:
                    print(
                        "WARNING: Server is using accelerator %s but runner %r is using accelerator %s."
                        % (
                            ax_version,
                            version,
                            v,
                        ))
            w, f, h, p, d = runner.load_methods(package_list, data)
            warnings.extend(w)
            failed.extend(f)
            self.hash.update(h)
            self.params.update(p)
            self.descriptions.update(d)
        for key, params in iteritems(self.params):
            self.typing[key] = options2typing(key, params.options)
            params.defaults = params2defaults(params)
            params.required = options2required(params.options)

        def prt(a, prefix):
            maxlen = (max(len(e) for e in a) + len(prefix))
            line = '=' * maxlen
            print()
            print(line)
            for e in sorted(a):
                msg = prefix + e
                print(msg + ' ' * (maxlen - len(msg)))
            print(line)
            print()

        if warnings:
            prt(warnings, 'WARNING: ')
        if failed:
            print(colour.WHITEBG + colour.RED + colour.BOLD)
            prt(failed, 'FAILED to import ')
            print(colour.RESET)
            raise MethodLoadException(failed)
        print("Updated %d methods on %d runners in %.1f seconds" % (
            len(self.hash),
            len(per_runner),
            monotonic() - t0,
        ))
Example #9
0
 def __init__(self, lst):
     AcceleratorError.__init__(self, 'Failed to load ' + ', '.join(lst))
     self.module_list = lst
Example #10
0
def call_analysis(analysis_func, sliceno_, delayed_start, q, preserve_result,
                  parent_pid, output_fds, **kw):
    try:
        q.make_writer()
        # tell iowrapper our PID, so our output goes to the right status stack.
        # (the pty is not quite a transparent transport ('\n' transforms into
        # '\r\n'), so we use a fairly human readable encoding.)
        writeall(output_fds[sliceno_], b'%16x' % (os.getpid(), ))
        # use our iowrapper fd instead of stdout/stderr
        os.dup2(output_fds[sliceno_], 1)
        os.dup2(output_fds[sliceno_], 2)
        for fd in output_fds:
            os.close(fd)
        os.close(_prof_fd)
        slicename = 'analysis(%d)' % (sliceno_, )
        setproctitle(slicename)
        if delayed_start:
            os.close(delayed_start[1])
            update = statmsg._start(
                'waiting for concurrency limit (%d)' % (sliceno_, ),
                parent_pid, True)
            if os.read(delayed_start[0], 1) != b'a':
                raise AcceleratorError('bad delayed_start, giving up')
            update(slicename)
            os.close(delayed_start[0])
        else:
            statmsg._start(slicename, parent_pid, True)
        kw['sliceno'] = g.sliceno = sliceno_
        for dw in dataset._datasetwriters.values():
            if dw._for_single_slice is None:
                dw._set_slice(sliceno_)
        res = analysis_func(**kw)
        if preserve_result:
            # Remove defaultdicts until we find one with a picklable default_factory.
            # (This is what you end up doing manually anyway.)
            def picklable(v):
                try:
                    pickle.dumps(v, pickle.HIGHEST_PROTOCOL)
                    return True
                except Exception:
                    return False

            def fixup(d):
                if isinstance(
                        d, defaultdict) and not picklable(d.default_factory):
                    if not d:
                        return {}
                    v = next(iteritems(d))
                    if isinstance(
                            v,
                            defaultdict) and not picklable(v.default_factory):
                        return {k: fixup(v) for k, v in iteritems(d)}
                    else:
                        return dict(d)
                else:
                    return d

            def save(item, name):
                blob.save(fixup(item), name, sliceno=sliceno_, temp=True)

            if isinstance(res, tuple):
                if sliceno_ == 0:
                    blob.save(len(res), "Analysis.tuple", temp=True)
                for ix, item in enumerate(res):
                    save(item, "Analysis.%d." % (ix, ))
            else:
                if sliceno_ == 0:
                    blob.save(False, "Analysis.tuple", temp=True)
                save(res, "Analysis.")
        from accelerator.extras import saved_files
        dw_lens = {}
        dw_minmax = {}
        dw_compressions = {}
        for name, dw in dataset._datasetwriters.items():
            if dw._for_single_slice or sliceno_ == 0:
                dw_compressions[name] = dw._compressions
            if dw._for_single_slice in (
                    None,
                    sliceno_,
            ):
                dw.close()
                dw_lens[name] = dw._lens
                dw_minmax[name] = dw._minmax
        c_fflush()
        q.put((
            sliceno_,
            monotonic(),
            saved_files,
            dw_lens,
            dw_minmax,
            dw_compressions,
            None,
        ))
        q.close()
    except:
        c_fflush()
        msg = fmt_tb(1)
        print(msg)
        q.put((
            sliceno_,
            monotonic(),
            {},
            {},
            {},
            {},
            msg,
        ))
        q.close()
        sleep(5)  # give launcher time to report error (and kill us)
        exitfunction()
Example #11
0
def fork_analysis(slices, concurrency, analysis_func, kw, preserve_result,
                  output_fds, q):
    from multiprocessing import Process
    import gc
    children = []
    t = monotonic()
    pid = os.getpid()
    if hasattr(gc, 'freeze'):
        # See https://bugs.python.org/issue31558
        # (Though we keep the gc disabled by default.)
        gc.freeze()
    delayed_start = False
    delayed_start_todo = 0
    for i in range(slices):
        if i == concurrency:
            assert concurrency != 0
            # The rest will wait on this queue
            delayed_start = os.pipe()
            delayed_start_todo = slices - i
        p = SimplifiedProcess(target=call_analysis,
                              args=(analysis_func, i, delayed_start, q,
                                    preserve_result, pid, output_fds),
                              kwargs=kw,
                              name='analysis-%d' % (i, ))
        children.append(p)
    for fd in output_fds:
        os.close(fd)
    if delayed_start:
        os.close(delayed_start[0])
    q.make_reader()
    per_slice = []
    temp_files = {}
    no_children_no_messages = False
    reap_time = monotonic() + 5
    exit_count = 0
    while len(per_slice) < slices:
        if exit_count > 0 or reap_time <= monotonic():
            still_alive = []
            for p in children:
                if p.is_alive():
                    still_alive.append(p)
                else:
                    exit_count -= 1
                    if p.exitcode:
                        raise AcceleratorError(
                            "%s terminated with exitcode %d" % (
                                p.name,
                                p.exitcode,
                            ))
            children = still_alive
            reap_time = monotonic() + 5
        # If a process dies badly we may never get a message here.
        # (iowrapper tries to tell us though.)
        # No need to handle that very quickly though, 10 seconds is fine.
        # (Typically this is caused by running out of memory.)
        try:
            msg = q.get(timeout=10)
            if not msg:
                # Notification from iowrapper, so we wake up (quickly) even if
                # the process died badly (e.g. from running out of memory).
                exit_count += 1
                continue
            s_no, s_t, s_temp_files, s_dw_lens, s_dw_minmax, s_dw_compressions, s_tb = msg
        except QueueEmpty:
            if not children:
                # No children left, so they must have all sent their messages.
                # Still, just to be sure there isn't a race, wait one iteration more.
                if no_children_no_messages:
                    raise AcceleratorError(
                        "All analysis processes exited cleanly, but not all returned a result."
                    )
                else:
                    no_children_no_messages = True
            continue
        if s_tb:
            data = [{'analysis(%d)' % (s_no, ): s_tb}, None]
            writeall(_prof_fd, json.dumps(data).encode('utf-8'))
            exitfunction()
        if delayed_start_todo:
            # Another analysis is allowed to run now
            os.write(delayed_start[1], b'a')
            delayed_start_todo -= 1
        per_slice.append((s_no, s_t))
        temp_files.update(s_temp_files)
        for name, lens in s_dw_lens.items():
            dataset._datasetwriters[name]._lens.update(lens)
        for name, minmax in s_dw_minmax.items():
            dataset._datasetwriters[name]._minmax.update(minmax)
        for name, compressions in s_dw_compressions.items():
            dataset._datasetwriters[name]._compressions.update(compressions)
    g.update_top_status("Waiting for all slices to finish cleanup")
    q.close()
    if delayed_start:
        os.close(delayed_start[1])
    for p in children:
        p.join()
    if preserve_result:
        res_seq = ResultIterMagic(
            slices, reuse_msg="analysis_res is an iterator, don't re-use it")
    else:
        res_seq = None
    return [v - t for k, v in sorted(per_slice)], temp_files, res_seq