Esempio n. 1
0
def call_analysis(analysis_func, sliceno_, delayed_start, q, preserve_result,
                  parent_pid, output_fds, **kw):
    try:
        q.make_writer()
        # tell iowrapper our PID, so our output goes to the right status stack.
        # (the pty is not quite a transparent transport ('\n' transforms into
        # '\r\n'), so we use a fairly human readable encoding.)
        writeall(output_fds[sliceno_], b'%16x' % (os.getpid(), ))
        # use our iowrapper fd instead of stdout/stderr
        os.dup2(output_fds[sliceno_], 1)
        os.dup2(output_fds[sliceno_], 2)
        for fd in output_fds:
            os.close(fd)
        os.close(_prof_fd)
        slicename = 'analysis(%d)' % (sliceno_, )
        setproctitle(slicename)
        if delayed_start:
            os.close(delayed_start[1])
            update = statmsg._start(
                'waiting for concurrency limit (%d)' % (sliceno_, ),
                parent_pid, True)
            if os.read(delayed_start[0], 1) != b'a':
                raise AcceleratorError('bad delayed_start, giving up')
            update(slicename)
            os.close(delayed_start[0])
        else:
            statmsg._start(slicename, parent_pid, True)
        kw['sliceno'] = g.sliceno = sliceno_
        for dw in dataset._datasetwriters.values():
            if dw._for_single_slice is None:
                dw._set_slice(sliceno_)
        res = analysis_func(**kw)
        if preserve_result:
            # Remove defaultdicts until we find one with a picklable default_factory.
            # (This is what you end up doing manually anyway.)
            def picklable(v):
                try:
                    pickle.dumps(v, pickle.HIGHEST_PROTOCOL)
                    return True
                except Exception:
                    return False

            def fixup(d):
                if isinstance(
                        d, defaultdict) and not picklable(d.default_factory):
                    if not d:
                        return {}
                    v = next(iteritems(d))
                    if isinstance(
                            v,
                            defaultdict) and not picklable(v.default_factory):
                        return {k: fixup(v) for k, v in iteritems(d)}
                    else:
                        return dict(d)
                else:
                    return d

            def save(item, name):
                blob.save(fixup(item), name, sliceno=sliceno_, temp=True)

            if isinstance(res, tuple):
                if sliceno_ == 0:
                    blob.save(len(res), "Analysis.tuple", temp=True)
                for ix, item in enumerate(res):
                    save(item, "Analysis.%d." % (ix, ))
            else:
                if sliceno_ == 0:
                    blob.save(False, "Analysis.tuple", temp=True)
                save(res, "Analysis.")
        from accelerator.extras import saved_files
        dw_lens = {}
        dw_minmax = {}
        dw_compressions = {}
        for name, dw in dataset._datasetwriters.items():
            if dw._for_single_slice or sliceno_ == 0:
                dw_compressions[name] = dw._compressions
            if dw._for_single_slice in (
                    None,
                    sliceno_,
            ):
                dw.close()
                dw_lens[name] = dw._lens
                dw_minmax[name] = dw._minmax
        c_fflush()
        q.put((
            sliceno_,
            monotonic(),
            saved_files,
            dw_lens,
            dw_minmax,
            dw_compressions,
            None,
        ))
        q.close()
    except:
        c_fflush()
        msg = fmt_tb(1)
        print(msg)
        q.put((
            sliceno_,
            monotonic(),
            {},
            {},
            {},
            {},
            msg,
        ))
        q.close()
        sleep(5)  # give launcher time to report error (and kill us)
        exitfunction()
Esempio n. 2
0
def execute_process(workdir,
                    jobid,
                    slices,
                    concurrency,
                    result_directory,
                    common_directory,
                    input_directory,
                    index=None,
                    workdirs=None,
                    server_url=None,
                    subjob_cookie=None,
                    parent_pid=0):
    WORKDIRS.update(workdirs)

    g.job = jobid
    setproctitle('launch')
    path = os.path.join(workdir, jobid)
    try:
        os.chdir(path)
    except Exception:
        print("Cannot cd to workdir", path)
        exit(1)

    g.params = params = job_params()
    method_ref = import_module(params.package + '.a_' + params.method)
    g.sliceno = -1

    g.job = CurrentJob(jobid, params, result_directory, input_directory)
    g.slices = slices

    g.options = params.options
    g.datasets = params.datasets
    g.jobs = params.jobs

    method_ref.options = params.options
    method_ref.datasets = params.datasets
    method_ref.jobs = params.jobs

    g.server_url = server_url
    g.running = 'launch'
    statmsg._start('%s %s' % (
        jobid,
        params.method,
    ), parent_pid)

    def dummy():
        pass

    prepare_func = getattr(method_ref, 'prepare', dummy)
    analysis_func = getattr(method_ref, 'analysis', dummy)
    synthesis_func = getattr(method_ref, 'synthesis', dummy)

    synthesis_needs_analysis = 'analysis_res' in getarglist(synthesis_func)

    fd2pid, names, masters, slaves = iowrapper.setup(
        slices, prepare_func is not dummy, analysis_func is not dummy)

    def switch_output():
        fd = slaves.pop()
        os.dup2(fd, 1)
        os.dup2(fd, 2)
        os.close(fd)

    if analysis_func is dummy:
        q = None
    else:
        q = LockFreeQueue()
    iowrapper.run_reader(fd2pid, names, masters, slaves, q=q)
    for fd in masters:
        os.close(fd)

    # A chain must be finished from the back, so sort on that.
    sortnum_cache = {}

    def dw_sortnum(name):
        if name not in sortnum_cache:
            dw = dataset._datasetwriters.get(name)
            if not dw:  # manually .finish()ed
                num = -1
            elif dw.previous and dw.previous.startswith(jobid + '/'):
                pname = dw.previous.split('/')[1]
                num = dw_sortnum(pname) + 1
            else:
                num = 0
            sortnum_cache[name] = num
        return sortnum_cache[name]

    prof = {}
    if prepare_func is dummy:
        prof['prepare'] = 0  # truthish!
    else:
        t = monotonic()
        switch_output()
        g.running = 'prepare'
        g.subjob_cookie = subjob_cookie
        setproctitle(g.running)
        with statmsg.status(g.running):
            g.prepare_res = method_ref.prepare(**args_for(method_ref.prepare))
            to_finish = [
                dw.name for dw in dataset._datasetwriters.values()
                if dw._started
            ]
            if to_finish:
                with statmsg.status("Finishing datasets"):
                    for name in sorted(to_finish, key=dw_sortnum):
                        dataset._datasetwriters[name].finish()
        c_fflush()
        prof['prepare'] = monotonic() - t
    switch_output()
    setproctitle('launch')
    from accelerator.extras import saved_files
    if analysis_func is dummy:
        prof['per_slice'] = []
        prof['analysis'] = 0
    else:
        t = monotonic()
        g.running = 'analysis'
        g.subjob_cookie = None  # subjobs are not allowed from analysis
        with statmsg.status(
                'Waiting for all slices to finish analysis') as update:
            g.update_top_status = update
            prof['per_slice'], files, g.analysis_res = fork_analysis(
                slices, concurrency, analysis_func, args_for(analysis_func),
                synthesis_needs_analysis, slaves, q)
            del g.update_top_status
        prof['analysis'] = monotonic() - t
        saved_files.update(files)
    t = monotonic()
    g.running = 'synthesis'
    g.subjob_cookie = subjob_cookie
    setproctitle(g.running)
    with statmsg.status(g.running):
        synthesis_res = synthesis_func(**args_for(synthesis_func))
        if synthesis_res is not None:
            blob.save(synthesis_res, temp=False)
        if dataset._datasetwriters:
            with statmsg.status("Finishing datasets"):
                for name in sorted(dataset._datasetwriters, key=dw_sortnum):
                    dataset._datasetwriters[name].finish()
    if dataset._datasets_written:
        blob.save(dataset._datasets_written,
                  'DS/LIST',
                  temp=False,
                  _hidden=True)
    c_fflush()
    t = monotonic() - t
    prof['synthesis'] = t

    from accelerator.subjobs import _record
    return None, (prof, saved_files, _record)