def _job_candidates_options(candidates): for jobid, remset in iteritems(candidates): setup = job_params(jobid) optdiff = defaultdict(dict) for thing in remset: section, name = thing.split('-', 1) optdiff[section][name] = setup[section][name] yield jobid, optdiff
def csvexport(sliceno, filename, labelsonfirstline): assert len(options.separator) == 1 assert options.quote_fields in ('', "'", '"',) d = datasets.source[0] if not options.labels: options.labels = sorted(d.columns) if options.chain_source: if jobids.previous: prev_source = job_params(jobids.previous).datasets.source assert len(datasets.source) == len(prev_source) else: prev_source = [None] * len(datasets.source) lst = [] for src, stop in zip(datasets.source, prev_source): lst.extend(src.chain(stop_ds=stop)) datasets.source = lst if filename.lower().endswith('.gz'): mkwrite = mkwrite_gz elif filename.lower().endswith('.csv'): mkwrite = mkwrite_uncompressed else: raise Exception("Filename should end with .gz for compressed or .csv for uncompressed") iters = [] first = True for label in options.labels: it = d.iterate_list(sliceno, label, datasets.source, status_reporting=first) first = False t = d.columns[label].type if t == 'unicode' and PY2: it = imap(enc, it) elif t == 'bytes' and PY3: it = imap(lambda s: s.decode('utf-8', errors='backslashreplace'), it) elif t in ('float32', 'float64', 'number'): it = imap(repr, it) elif t == 'json': it = imap(dumps, it) elif t not in ('unicode', 'ascii', 'bytes'): it = imap(str, it) iters.append(it) it = izip(*iters) with mkwrite(filename) as write: q = options.quote_fields sep = options.separator if q: qq = q + q if labelsonfirstline: write(enc(sep.join(q + n.replace(q, qq) + q for n in options.labels))) for data in it: write(sep.join(q + n.replace(q, qq) + q for n in data)) else: if labelsonfirstline: write(enc(sep.join(options.labels))) for data in it: write(sep.join(data))
def params(self): from accelerator.extras import job_params return job_params(self)
def execute_process(workdir, jobid, slices, concurrency, result_directory, common_directory, input_directory, index=None, workdirs=None, server_url=None, subjob_cookie=None, parent_pid=0): WORKDIRS.update(workdirs) g.job = jobid setproctitle('launch') path = os.path.join(workdir, jobid) try: os.chdir(path) except Exception: print("Cannot cd to workdir", path) exit(1) g.params = params = job_params() method_ref = import_module(params.package + '.a_' + params.method) g.sliceno = -1 g.job = CurrentJob(jobid, params, result_directory, input_directory) g.slices = slices g.options = params.options g.datasets = params.datasets g.jobs = params.jobs method_ref.options = params.options method_ref.datasets = params.datasets method_ref.jobs = params.jobs g.server_url = server_url g.running = 'launch' statmsg._start('%s %s' % ( jobid, params.method, ), parent_pid) def dummy(): pass prepare_func = getattr(method_ref, 'prepare', dummy) analysis_func = getattr(method_ref, 'analysis', dummy) synthesis_func = getattr(method_ref, 'synthesis', dummy) synthesis_needs_analysis = 'analysis_res' in getarglist(synthesis_func) fd2pid, names, masters, slaves = iowrapper.setup( slices, prepare_func is not dummy, analysis_func is not dummy) def switch_output(): fd = slaves.pop() os.dup2(fd, 1) os.dup2(fd, 2) os.close(fd) if analysis_func is dummy: q = None else: q = LockFreeQueue() iowrapper.run_reader(fd2pid, names, masters, slaves, q=q) for fd in masters: os.close(fd) # A chain must be finished from the back, so sort on that. sortnum_cache = {} def dw_sortnum(name): if name not in sortnum_cache: dw = dataset._datasetwriters.get(name) if not dw: # manually .finish()ed num = -1 elif dw.previous and dw.previous.startswith(jobid + '/'): pname = dw.previous.split('/')[1] num = dw_sortnum(pname) + 1 else: num = 0 sortnum_cache[name] = num return sortnum_cache[name] prof = {} if prepare_func is dummy: prof['prepare'] = 0 # truthish! else: t = monotonic() switch_output() g.running = 'prepare' g.subjob_cookie = subjob_cookie setproctitle(g.running) with statmsg.status(g.running): g.prepare_res = method_ref.prepare(**args_for(method_ref.prepare)) to_finish = [ dw.name for dw in dataset._datasetwriters.values() if dw._started ] if to_finish: with statmsg.status("Finishing datasets"): for name in sorted(to_finish, key=dw_sortnum): dataset._datasetwriters[name].finish() c_fflush() prof['prepare'] = monotonic() - t switch_output() setproctitle('launch') from accelerator.extras import saved_files if analysis_func is dummy: prof['per_slice'] = [] prof['analysis'] = 0 else: t = monotonic() g.running = 'analysis' g.subjob_cookie = None # subjobs are not allowed from analysis with statmsg.status( 'Waiting for all slices to finish analysis') as update: g.update_top_status = update prof['per_slice'], files, g.analysis_res = fork_analysis( slices, concurrency, analysis_func, args_for(analysis_func), synthesis_needs_analysis, slaves, q) del g.update_top_status prof['analysis'] = monotonic() - t saved_files.update(files) t = monotonic() g.running = 'synthesis' g.subjob_cookie = subjob_cookie setproctitle(g.running) with statmsg.status(g.running): synthesis_res = synthesis_func(**args_for(synthesis_func)) if synthesis_res is not None: blob.save(synthesis_res, temp=False) if dataset._datasetwriters: with statmsg.status("Finishing datasets"): for name in sorted(dataset._datasetwriters, key=dw_sortnum): dataset._datasetwriters[name].finish() if dataset._datasets_written: blob.save(dataset._datasets_written, 'DS/LIST', temp=False, _hidden=True) c_fflush() t = monotonic() - t prof['synthesis'] = t from accelerator.subjobs import _record return None, (prof, saved_files, _record)