def call_analysis(analysis_func, sliceno_, q, preserve_result, parent_pid, **kw):
	try:
		status._start('analysis(%d)' % (sliceno_,), parent_pid, 't')
		os.close(_prof_fd)
		for stupid_inconsistent_name in ('sliceno', 'index'):
			if stupid_inconsistent_name in kw:
				kw[stupid_inconsistent_name] = sliceno_
			setattr(g, stupid_inconsistent_name, sliceno_)
		for dw in dataset._datasetwriters.values():
			if dw._for_single_slice is None:
				dw._set_slice(sliceno_)
		res = analysis_func(**kw)
		if preserve_result:
			# Remove defaultdicts until we find one with a picklable default_factory.
			# (This is what you end up doing manually anyway.)
			def picklable(v):
				try:
					pickle.dumps(v, pickle.HIGHEST_PROTOCOL)
					return True
				except Exception:
					return False
			def fixup(d):
				if isinstance(d, defaultdict) and not picklable(d.default_factory):
					if not d:
						return {}
					v = next(iteritems(d))
					if isinstance(v, defaultdict) and not picklable(v.default_factory):
						return {k: fixup(v) for k, v in iteritems(d)}
					else:
						return dict(d)
				else:
					return d
			def save(item, name):
				blob.save(fixup(item), name, sliceno=sliceno_, temp=True)
			if isinstance(res, tuple):
				if sliceno_ == 0:
					blob.save(len(res), "Analysis.tuple", temp=True)
				for ix, item in enumerate(res):
					save(item, "Analysis.%d." % (ix,))
			else:
				if sliceno_ == 0:
					blob.save(False, "Analysis.tuple", temp=True)
				save(res, "Analysis.")
		from extras import saved_files
		dw_lens = {}
		dw_minmax = {}
		for name, dw in dataset._datasetwriters.items():
			if dw._for_single_slice in (None, sliceno_,):
				dw.close()
				dw_lens[name] = dw._lens
				dw_minmax[name] = dw._minmax
		status._end()
		q.put((sliceno_, time(), saved_files, dw_lens, dw_minmax, None,))
	except:
		status._end()
		q.put((sliceno_, time(), {}, {}, {}, fmt_tb(1),))
		print_exc()
		sleep(5) # give launcher time to report error (and kill us)
		exitfunction()
Esempio n. 2
0
def execute_process(workdir,
                    jobid,
                    slices,
                    result_directory,
                    common_directory,
                    source_directory,
                    index=None,
                    workspaces=None,
                    daemon_url=None,
                    subjob_cookie=None,
                    parent_pid=0):
    g.JOBID = jobid
    setproctitle('launch')
    path = os.path.join(workdir, jobid)
    try:
        os.chdir(path)
    except Exception:
        print("Cannot cd to workdir", path)
        exit(1)

    g.params = params = job_params()
    method_ref = import_module(params.package + '.a_' + params.method)
    g.sliceno = -1

    if workspaces:
        jobid_module.put_workspaces(workspaces)

    def maybe_dataset(v):
        if isinstance(v, list):
            return [maybe_dataset(e) for e in v]
        if not v:
            return ''
        try:
            return dataset.Dataset(v)
        except IOError:
            return v

    datasets = DotDict(
        {k: maybe_dataset(v)
         for k, v in params.datasets.items()})

    g.options = params.options
    g.datasets = datasets
    g.jobids = params.jobids

    method_ref.options = params.options
    method_ref.datasets = datasets
    method_ref.jobids = params.jobids

    # compatibility names
    g.SLICES = slices
    g.JOBID = jobid
    g.jobid = jobid
    g.METHOD = params.method
    g.WORKSPACEPATH = workdir
    g.CAPTION = params.caption
    g.PACKAGE = params.package
    g.RESULT_DIRECTORY = result_directory
    g.COMMON_DIRECTORY = common_directory
    g.SOURCE_DIRECTORY = source_directory
    g.index = -1

    g.daemon_url = daemon_url
    g.running = 'launch'
    status._start('%s %s' % (
        jobid,
        params.method,
    ), parent_pid)

    def dummy():
        pass

    prepare_func = getattr(method_ref, 'prepare', dummy)
    analysis_func = getattr(method_ref, 'analysis', dummy)
    synthesis_func = getattr(method_ref, 'synthesis', dummy)

    synthesis_needs_analysis = 'analysis_res' in getarglist(synthesis_func)

    # A chain must be finished from the back, so sort on that.
    sortnum_cache = {}

    def dw_sortnum(name):
        if name not in sortnum_cache:
            dw = dataset._datasetwriters[name]
            if dw.previous and dw.previous.startswith(jobid + '/'):
                pname = dw.previous.split('/')[1]
                num = dw_sortnum(pname) + 1
            else:
                num = 0
            sortnum_cache[name] = num
        return sortnum_cache[name]

    prof = {}
    if prepare_func is dummy:
        prof['prepare'] = 0  # truthish!
    else:
        t = time()
        g.running = 'prepare'
        g.subjob_cookie = subjob_cookie
        setproctitle(g.running)
        with status.status(g.running):
            g.prepare_res = method_ref.prepare(**args_for(method_ref.prepare))
            to_finish = [
                dw.name for dw in dataset._datasetwriters.values()
                if dw._started
            ]
            if to_finish:
                with status.status("Finishing datasets"):
                    for name in sorted(to_finish, key=dw_sortnum):
                        dataset._datasetwriters[name].finish()
        prof['prepare'] = time() - t
    setproctitle('launch')
    from extras import saved_files
    if analysis_func is dummy:
        prof['per_slice'] = []
        prof['analysis'] = 0
    else:
        t = time()
        g.running = 'analysis'
        g.subjob_cookie = None  # subjobs are not allowed from analysis
        with status.status(
                'Waiting for all slices to finish analysis') as update:
            g.update_top_status = update
            prof['per_slice'], files, g.analysis_res = fork_analysis(
                slices, analysis_func, args_for(analysis_func),
                synthesis_needs_analysis)
            del g.update_top_status
        prof['analysis'] = time() - t
        saved_files.update(files)
    t = time()
    g.running = 'synthesis'
    g.subjob_cookie = subjob_cookie
    setproctitle(g.running)
    with status.status(g.running):
        synthesis_res = synthesis_func(**args_for(synthesis_func))
        if synthesis_res is not None:
            blob.save(synthesis_res, temp=False)
        if dataset._datasetwriters:
            with status.status("Finishing datasets"):
                for name in sorted(dataset._datasetwriters, key=dw_sortnum):
                    dataset._datasetwriters[name].finish()
    t = time() - t
    prof['synthesis'] = t

    from subjobs import _record
    status._end()
    return None, (prof, saved_files, _record)