예제 #1
0
def _job_candidates_options(candidates):
    for jobid, remset in iteritems(candidates):
        setup = job_params(jobid)
        optdiff = defaultdict(dict)
        for thing in remset:
            section, name = thing.split('-', 1)
            optdiff[section][name] = setup[section][name]
        yield jobid, optdiff
예제 #2
0
def csvexport(sliceno, filename, labelsonfirstline):
	assert len(options.separator) == 1
	assert options.quote_fields in ('', "'", '"',)
	d = datasets.source[0]
	if not options.labels:
		options.labels = sorted(d.columns)
	if options.chain_source:
		if jobids.previous:
			prev_source = job_params(jobids.previous).datasets.source
			assert len(datasets.source) == len(prev_source)
		else:
			prev_source = [None] * len(datasets.source)
		lst = []
		for src, stop in zip(datasets.source, prev_source):
			lst.extend(src.chain(stop_ds=stop))
		datasets.source = lst
	if filename.lower().endswith('.gz'):
		mkwrite = mkwrite_gz
	elif filename.lower().endswith('.csv'):
		mkwrite = mkwrite_uncompressed
	else:
		raise Exception("Filename should end with .gz for compressed or .csv for uncompressed")
	iters = []
	first = True
	for label in options.labels:
		it = d.iterate_list(sliceno, label, datasets.source, status_reporting=first)
		first = False
		t = d.columns[label].type
		if t == 'unicode' and PY2:
			it = imap(enc, it)
		elif t == 'bytes' and PY3:
			it = imap(lambda s: s.decode('utf-8', errors='backslashreplace'), it)
		elif t in ('float32', 'float64', 'number'):
			it = imap(repr, it)
		elif t == 'json':
			it = imap(dumps, it)
		elif t not in ('unicode', 'ascii', 'bytes'):
			it = imap(str, it)
		iters.append(it)
	it = izip(*iters)
	with mkwrite(filename) as write:
		q = options.quote_fields
		sep = options.separator
		if q:
			qq = q + q
			if labelsonfirstline:
				write(enc(sep.join(q + n.replace(q, qq) + q for n in options.labels)))
			for data in it:
				write(sep.join(q + n.replace(q, qq) + q for n in data))
		else:
			if labelsonfirstline:
				write(enc(sep.join(options.labels)))
			for data in it:
				write(sep.join(data))
예제 #3
0
 def params(self):
     from accelerator.extras import job_params
     return job_params(self)
예제 #4
0
def execute_process(workdir,
                    jobid,
                    slices,
                    concurrency,
                    result_directory,
                    common_directory,
                    input_directory,
                    index=None,
                    workdirs=None,
                    server_url=None,
                    subjob_cookie=None,
                    parent_pid=0):
    WORKDIRS.update(workdirs)

    g.job = jobid
    setproctitle('launch')
    path = os.path.join(workdir, jobid)
    try:
        os.chdir(path)
    except Exception:
        print("Cannot cd to workdir", path)
        exit(1)

    g.params = params = job_params()
    method_ref = import_module(params.package + '.a_' + params.method)
    g.sliceno = -1

    g.job = CurrentJob(jobid, params, result_directory, input_directory)
    g.slices = slices

    g.options = params.options
    g.datasets = params.datasets
    g.jobs = params.jobs

    method_ref.options = params.options
    method_ref.datasets = params.datasets
    method_ref.jobs = params.jobs

    g.server_url = server_url
    g.running = 'launch'
    statmsg._start('%s %s' % (
        jobid,
        params.method,
    ), parent_pid)

    def dummy():
        pass

    prepare_func = getattr(method_ref, 'prepare', dummy)
    analysis_func = getattr(method_ref, 'analysis', dummy)
    synthesis_func = getattr(method_ref, 'synthesis', dummy)

    synthesis_needs_analysis = 'analysis_res' in getarglist(synthesis_func)

    fd2pid, names, masters, slaves = iowrapper.setup(
        slices, prepare_func is not dummy, analysis_func is not dummy)

    def switch_output():
        fd = slaves.pop()
        os.dup2(fd, 1)
        os.dup2(fd, 2)
        os.close(fd)

    if analysis_func is dummy:
        q = None
    else:
        q = LockFreeQueue()
    iowrapper.run_reader(fd2pid, names, masters, slaves, q=q)
    for fd in masters:
        os.close(fd)

    # A chain must be finished from the back, so sort on that.
    sortnum_cache = {}

    def dw_sortnum(name):
        if name not in sortnum_cache:
            dw = dataset._datasetwriters.get(name)
            if not dw:  # manually .finish()ed
                num = -1
            elif dw.previous and dw.previous.startswith(jobid + '/'):
                pname = dw.previous.split('/')[1]
                num = dw_sortnum(pname) + 1
            else:
                num = 0
            sortnum_cache[name] = num
        return sortnum_cache[name]

    prof = {}
    if prepare_func is dummy:
        prof['prepare'] = 0  # truthish!
    else:
        t = monotonic()
        switch_output()
        g.running = 'prepare'
        g.subjob_cookie = subjob_cookie
        setproctitle(g.running)
        with statmsg.status(g.running):
            g.prepare_res = method_ref.prepare(**args_for(method_ref.prepare))
            to_finish = [
                dw.name for dw in dataset._datasetwriters.values()
                if dw._started
            ]
            if to_finish:
                with statmsg.status("Finishing datasets"):
                    for name in sorted(to_finish, key=dw_sortnum):
                        dataset._datasetwriters[name].finish()
        c_fflush()
        prof['prepare'] = monotonic() - t
    switch_output()
    setproctitle('launch')
    from accelerator.extras import saved_files
    if analysis_func is dummy:
        prof['per_slice'] = []
        prof['analysis'] = 0
    else:
        t = monotonic()
        g.running = 'analysis'
        g.subjob_cookie = None  # subjobs are not allowed from analysis
        with statmsg.status(
                'Waiting for all slices to finish analysis') as update:
            g.update_top_status = update
            prof['per_slice'], files, g.analysis_res = fork_analysis(
                slices, concurrency, analysis_func, args_for(analysis_func),
                synthesis_needs_analysis, slaves, q)
            del g.update_top_status
        prof['analysis'] = monotonic() - t
        saved_files.update(files)
    t = monotonic()
    g.running = 'synthesis'
    g.subjob_cookie = subjob_cookie
    setproctitle(g.running)
    with statmsg.status(g.running):
        synthesis_res = synthesis_func(**args_for(synthesis_func))
        if synthesis_res is not None:
            blob.save(synthesis_res, temp=False)
        if dataset._datasetwriters:
            with statmsg.status("Finishing datasets"):
                for name in sorted(dataset._datasetwriters, key=dw_sortnum):
                    dataset._datasetwriters[name].finish()
    if dataset._datasets_written:
        blob.save(dataset._datasets_written,
                  'DS/LIST',
                  temp=False,
                  _hidden=True)
    c_fflush()
    t = monotonic() - t
    prof['synthesis'] = t

    from accelerator.subjobs import _record
    return None, (prof, saved_files, _record)