def exec_job_LOAD(options,*args,**kw): setattr(options,'job',args[0]) kw = udict(kw) (datareqs,processors,outputs) = e4t.cli.get_elem_configs(*args,**kw) cfg = e4t.cli._read_cfgfile(kw,'OUTPUT',args[0]) _cfgs = (kw, cfg) _dataset = tsload_datareqs(options,None,*datareqs)
def exec_job_COMPILE(options,*args,**kw): """Make .tex file for JOB""" kw = udict(kw) base = join(options.process_path,'input') copy_file(kw,base) datareqs, processors, outputs = e4t.cli.get_elem_configs(*args, **kw) _dataset = None elapsed = [.0,.0,.0] if datareqs: with Timer() as t: _dataset = tsload_datareqs(options,_dataset,*datareqs,**kw) logger.debug("TIME tsload %f",t.secs) elapsed[0]=t.secs if processors: with Timer() as t: _dataset = process_dataset(options,_dataset,processors,*args,**kw) logger.debug("TIME process %f",t.secs) elapsed[1]=t.secs if not _dataset: logger.error('VOID dataset on output step') # raise IOError('VOID dataset') if True: # outputs: with Timer() as t: output_dataset(options,_dataset,outputs,*args,**kw) logger.debug("TIME output %f",t.secs) elapsed[2]=t.secs else: logger.error('No conf for output step') raise IOError('VOID output') setattr(options,'elapsed',elapsed) if options.switch_debug: # Write the report file e4t.cli.save_accounting("%s.cli" % options.job) codecs.open("%s.time" % options.job, 'w', 'utf-8').write(u"tsload: %f\nprocss: %f\noutput: %f\n" % (elapsed[0], elapsed[1], elapsed[2]))
def exec_job_EXPORT(options,*args,**kw): setattr(options,'job',args[0]) kw = udict(kw) formats = options.options.xget('FORMAT',('XLS',)) phase = [ x.upper() for x in options.options.xget('PHASE',('PROCESS',))] logger.debug('EXPORT FILE OUTPUT FOR %s - f:%s - p:%s', ','.join(args), ','.join(formats), ','.join(phase)) (datareqs,processors,outputs) = e4t.cli.get_elem_configs(*args,**kw) cfg_T = e4t.cli._read_cfgfile(kw,'OUTPUT') if cfg_T is None: return cfg = cfg_T.xget(args[0]) if re.match('multiple',cfg.xget('KIND'),re.I): elements = cfg.xget_list('ELEMENTS') cfgs = dict([ (el,cfg_T.xget(el)) for el in elements ]) cfg.update(cfgs) _cfgs = (kw, cfg, cfg_T) _dataset = tsload_datareqs(options,None,*datareqs) logger.debug('data loaded') if 'LOAD' in phase: save_ds_with_fmts(formats,_dataset,_cfgs,'raw',*args,**kw) if len(phase)==1: return _dataset = process_dataset(options,_dataset,processors) logger.debug('data processed') if 'PROCESS' in phase: save_ds_with_fmts(formats,_dataset,_cfgs,'proc',*args,**kw) if 'DEFINITION' in phase: save_definition(options,formats,_dataset,outputs,_cfgs,'def',*args,**kw)
def exec_cache_COUNT(options,jlist,base,jobspecfile,order,jjspec): from e4t.load.jobs import tsload_datareqs from e4t.timeseries import Timeseries from e4t.process.jobs import process_dataset from e4t.utils import acct cache = PickleCache(options) offset = _get_offset(options,1) num_els = 0 num_nums = 0 num_rew = 0 for section,jobs in jlist.items(): print section for j,k in jobs: (datareqs,processors,outputs) = e4t.cli.get_elem_configs(j,**k) if datareqs: _dataset = tsload_datareqs(options,None,*datareqs) _base_ds = set(_dataset.keys()) for k,v in _dataset.items(): num_els += 1 if isinstance(v,Timeseries): num_nums += len(v._data) elif isinstance(v,(list,tuple,np.ndarray)): num_nums += len(v) _dataset = process_dataset(options,_dataset,processors) _proc_ds = set(_dataset.keys())-_base_ds # num_els += len(_proc_ds) # print "B:",_base_ds # print "R:",_proc_ds for k in _proc_ds: v = _dataset[k] num_els += 1 num_rew += 1 if isinstance(v,Timeseries): num_nums += len(v._data) elif isinstance(v,(list,tuple,np.ndarray)): num_nums += len(v) print "NELS=",num_els print "NREW=",num_rew print "NNUM=",num_nums
def exec_show_job_SOLR(options, cwriter, *args, **kw): global SOURCE, TITLE, SUBTITLE, PTITLE, LABEL, DATA, FORMULA, URL, PROCESSED, DOWNLOADED, DATAREQ, PROCESSOR, PID if re.match("^(|us|jp|uk|in|ch|br|ru|po|tu)struct|cfa|cpt|uslms$", args[0], re.I): return setattr(options, "job", args[0]) kw = udict(kw) base = join(options.process_path, "input") (datareqs, processors, outputs) = e4t.cli.get_elem_configs(*args, **kw) if len(datareqs) == 0: return acct = {} DATAREQ = datareqs[0].replace(options.process_path, "") if datareqs[0] else "" _dataset = tsload_datareqs(options, None, *datareqs, **kw) prcfg = {} if len(processors) > 0: pr = processors[0] PROCESSOR = pr prcfg = udict() if exists(pr): pr = join(options.process_path, pr) if exists(pr): prcfg = cfg2hash(pr).xget_dict("PROCESSOR") ou = outputs[0] if not exists(ou): ou = join(options.process_path, ou) if not exists(ou): logger.error("E01#001: Il file di specifica del processore %s non esiste", pr) raise ValueError, "E:MKEL:001" wcfg = cfg2hash(ou) jcfg = wcfg.xget_dict(options.job) SOURCE = clean_latex(jcfg.xget("SOURCE").replace("\n", " ") if "SOURCE" in jcfg else None) TITLE = clean_latex(jcfg.xget("TITLE").replace("\n", " ") if "TITLE" in jcfg else None) SUBTITLE = clean_latex(jcfg.xget("SUBTITLE").replace("\n", " ") if "SUBTITLE" in jcfg else None) KIND = jcfg.xget("KIND") if not re.match("figure|table", KIND, re.I): return _base_ds = set(_dataset.keys()) _dataset = process_dataset(options, _dataset, processors, *args, **kw) _base_acct = udict() nacct = rework_acct() _base_acct.update(nacct) _proc_ds = set(_dataset.keys()) - _base_ds if re.match("figure", KIND, re.I): panels = jcfg.xget_list("PANELS") for panel in panels: pcfg = wcfg.xget_dict(panel) PTITLE = pcfg.xget("TITLE").replace("\n", " ") if "TITLE" in pcfg else None PTITLE = clean_latex(PTITLE) series = [] series1 = pcfg.xget_list("SERIES") series2 = pcfg.xget_list("LEFT SERIES") series3 = pcfg.xget_list("RIGHT SERIES") if series1: series.extend(series1) if series2: series.extend(series2) if series3: series.extend(series3) for serie in series: LABEL = serie DATA = serie FORMULA = None URL = "" PROVIDER = "" scfg = wcfg.xget_dict(serie) if scfg: LABEL = clean_latex(scfg.xget("LABEL")) DATA = scfg.xget("DATA") PROCESSED = DATA in _proc_ds DOWNLOADED = DATA in _base_ds if PROCESSED: FORMULA = prcfg.xget(DATA).replace("\n", " ") if DATA in prcfg else None if DOWNLOADED: if DATA not in _base_acct: print DATA, "not in acct", _base_acct.keys(), ",".join(_base_ds) else: D = _base_acct.xget(DATA) P = D["provider"] URL = P + "://" + _base_acct.xget(DATA)["url"] PROVIDER = P PROCESSED = str(PROCESSED) if PROCESSED else None DOWNLOADED = str(DOWNLOADED) if DOWNLOADED else None WPROCESSOR = "" if not PROCESSOR else WSOURCE + PROCESSOR.replace(options.process_path, "") XLSRAW = WEB + "ds/%s-raw.xls" % JOB.lower() XLSPROC = WEB + "ds/%s-proc.xls" % JOB.lower() if PROCESSOR else "" XTRNAL = "True" if re.match("^dstream|http|file", PROVIDER) is not None else None PID += 1 cwriter.writerow( ( str(PID), PUBLIC, str(PAGE), KIND, JOB, WEB + OUTFILE, WSOURCE + DATAREQ, WPROCESSOR, WSOURCE + OUTPUT, XLSRAW, XLSPROC, TITLE, SUBTITLE, SOURCE, PTITLE, LABEL, DATA, DOWNLOADED, PROVIDER, URL, PROCESSED, FORMULA, XTRNAL, ) ) pass elif re.match("table", KIND, re.I): blocks = jcfg.xget_list("BLOCK") if not blocks: return orient, m, n, ELS = _extract_data_matrix_def(prcfg, _dataset) line = 0 TITLES = [] for b in blocks: bcfg = wcfg.xget_dict(b) models = bcfg.xget_list("MODEL") PTITLE = bcfg.xget("TITLE").replace("\n", " ") if "TITLE" in bcfg else None if PTITLE: PTITLE = clean_latex(PTITLE) else: PTITLE = TITLE for mod in models: mod = clean_model(mod) if not mod: continue LABEL = mod DATA = mod FORMULA = None URL = "" PROVIDER = "" scfg = wcfg.xget_dict(mod) if scfg: TITLES.append(clean_latex(scfg.xget("TITLE"))) line += 1 lines = line for line in range(lines): series = ELS[line] for n, serie in series: DATA = serie PROCESSED = DATA in _proc_ds DOWNLOADED = DATA in _base_ds if PROCESSED: FORMULA = prcfg.xget(DATA).replace("\n", " ") if DATA in prcfg else None if DOWNLOADED: if DATA not in _base_acct: print DATA, "not in acct", _base_acct.keys(), ",".join(_base_ds) else: D = _base_acct.xget(DATA) P = D["provider"] URL = P + "://" + _base_acct.xget(DATA)["url"] PROVIDER = P PROCESSED = str(PROCESSED) if PROCESSED else None DOWNLOADED = str(DOWNLOADED) if DOWNLOADED else None WPROCESSOR = "" if not PROCESSOR else WSOURCE + PROCESSOR.replace(options.process_path, "") XLSRAW = WEB + "ds/%s-raw.xls" % JOB.lower() XLSPROC = WEB + "ds/%s-proc.xls" % JOB.lower() if PROCESSOR else "" XTRNAL = "True" if re.match("^dstream|http|file", PROVIDER) is not None else None TITLE = TITLES[line] PID += 1 cwriter.writerow( ( str(PID), PUBLIC, str(PAGE), KIND, JOB, WEB + OUTFILE, WSOURCE + DATAREQ, WPROCESSOR, WSOURCE + OUTPUT, XLSRAW, XLSPROC, TITLE, SUBTITLE, SOURCE, PTITLE, LABEL, DATA, DOWNLOADED, PROVIDER, URL, PROCESSED, FORMULA, XTRNAL, ) ) elif re.match("multiple", KIND, re.I): pass else: logger.error("E01#002: Il file di specifica del processore %s non esiste", pr) raise ValueError, "E:MKEL:001"