def process_dataset(options,_dataset,processors,*args,**kw): """Given options, Information Set and processors filenames execute processors over the information set """ for pr in processors: if not exists(pr): pr = join(options.process_path,pr) if not exists(pr): logger.error('E01#001: Processor config file not exists (%s)',pr) raise ValueError, 'E:MKEL:001' cfg = udict(cfg2hash(pr)) if 'PROCESSOR' in cfg: pcfg = udict(cfg.xget('PROCESSOR')) pname = pcfg.xget('NAME') if not pname: logger.error('E01#002: Key NAME not exists in processor config (%s)',pr) continue processor = getProcessor(pname, _dataset, config=pcfg) if processor: _dataset = processor.execute() else: logger.error('E01#003: Processor not exists (%s)',pname) return _dataset
def writeout_processor_file(elem,options=Options()): """Write Processor File :param elem: the processor specification :type elem: a filename or specification line :Example: e4t --request random://uniform/VAR~:2012-12-31~2012-01-01~D \ --processor FORMULA=J;J=VAR*100 """ if not elem: return _dict = udict() for i,e in enumerate(elem): if exists(e): _dict = cfg2hash(e) else: _d = udict({ 'kind':'processor', 'active':'yes' , 'name': 'BASIC', 'operations': 'FORMULA', }) _in=as_dict(e,";") _d.update(_in) if not _d.has_key('FORMULA'): _d['FORMULA']=','.join(sorted([ k for k in _in.keys() if k not in ('DROP',) ])) if len(_d['FORMULA'])==0: del _d['FORMULA'] job = 'PROC%d'%i _dict[job]=_d _dict['PROCESSOR']=udict({ 'processors': ','.join(sorted(_dict.keys()))}) return writeout_file(_dict,header='# processor file')
def output_dataset(options,_dataset,outputs,*args,**kw): kw = udict(kw) label = kw.xget('LABEL').upper() kind = kw.xget('KIND').upper() parts = udict({ 'page' : LaTeXPage, 'addtotoc': LaTeXAddToToc, 'part' : LaTeXPart, 'chapter' : LaTeXChapter, 'toc' : LaTeXTOC, 'cover' : LaTeXCover, 'empty' : LaTeXEmpty, 'table' : OutputTable, 'figure' : OutputFigure, 'dataset' : None, 'multiple' : OutputMultiple}) if kind not in parts.keys(): raise ValueError, "Document part %s not found" % kind fname = "%s.tex" % options.job for ou in outputs: if not exists(ou): ou = join(options.process_path,ou) if not exists(ou): logger.error('E01#001: Il file di specifica del processore %s non esiste',pr) raise ValueError, 'E:MKEL:001' wcfg = udict(cfg2hash(ou)) lcfg = wcfg.xget(label) with Timer() as t: _output_element(options,label,ou,_dataset,parts[kind],fname,**kw) _accounting["OUT.TIME.%s"%ou] = t.msecs xfname = "%s.e4t" % options.job logger.debug('Write out %s',xfname) save_dict(xfname,_accounting,options.switch_debug) _accounting.clear() if 'OUTPUT' in kw: return # NON-OUTPUT ELEMENTS if parts[kind] is None: logger.error('Output error for %s',kind) return t = parts[kind](kw) l = LaTeX(options) if hasattr(t,'setup_elements'): t.setup_elements() if hasattr(t,'produce'): t.produce() ofile = fname f = open(ofile, 'w') f.write(l.layout(t,False)) f.close() outfiles.append(ofile)
def _read_cfgfile(kw,fkey,label=None): fn = udict(kw).xget(fkey) if fn: if not exists(fn): fn = join(options.process_path,fn) if not exists(fn): logger.error('E01#001: cfg file for dataset %s missing',fn) raise ValueError, 'E:MKEL:001' cfg = udict(cfg2hash(fn)) if label: cfg = udict(cfg).xget(label) return cfg
def save_definition(options,formats,_dataset,outputs,_cfgs,extn,*args,**kw): kw=udict(kw) label = kw.xget('LABEL').upper() kind = kw.xget('KIND').upper() parts = udict({ 'page' : LaTeXPage, 'addtotoc': LaTeXAddToToc, 'part' : LaTeXPart, 'chapter' : LaTeXChapter, 'toc' : LaTeXTOC, 'cover' : LaTeXCover, 'empty' : LaTeXEmpty, 'table' : OutputTable, 'figure' : OutputFigure, 'dataset' : None, 'multiple' : OutputMultiple }) if kind not in parts.keys(): raise ValueError, "Document part %s not found" % kind elem_class = parts[kind] fname = "%s.tex" % options.job defn = [] for ou in outputs: if not exists(ou): ou = join(options.process_path,ou) if not exists(ou): logger.error('E01#001: Il file di specifica del processore %s non esiste',pr) raise ValueError, 'E:MKEL:001' wcfg = udict(cfg2hash(ou)) lcfg = wcfg.xget(label) if exists(ou): spec = cfg2hash(ou) kw=udict(kw) if 'macros' in kw: spec['macros']=kw['macros'] else: logger.error('{OUTPUTELEMENT}:E:INIT:001 -- Il file di configurazione %s non esiste', ou) raise ValueError, "E:OUTPUTELEMENT:INIT:001" spec = mapspec(label,spec,_dataset,options, lambda k,v,name,spec,_dataset,options: (k, expandfuncs(v,name,spec,_dataset,options))) t = elem_class(label,spec,_dataset,options) xml = t.to_xml() defn.append( xml ) xmls = dom.parseString(xml) # or xml.dom.minidom.parseString(xml_string) pp = Postprocessor(options=options) pp.output(False,xmls.toprettyxml(),'%s.xml'% options.job)
def update_from_strings(self,definitions): if definitions: d = udict([ v.split('=') for v in definitions]) self.update(d) if 'DATE' in d: DT = d['DATE'].replace('/','-') if re.match('[0-9][0-9][0-9][0-9]-[0-9][0-9][0-9]?-[0-9][0-9]?',DT): fmt = "%Y-%m-%d" elif re.match('[0-9][0-9][0-9]?-[0-9][0-9]?-[0-9][0-9][0-9][0-9]',DT): fmt = "%d-%m-%Y" else: logger.error('DATE format non recognized in %s',d['DATE']) return ValueError, 'DATE format non recognized in %s' % d['DATE'] _date_t = datetime.strptime(DT,fmt) _DEFINES = { 'THISMONTH': str(_date_t.month), 'THISYEAR': str(_date_t.year), 'TYEAR': str(_date_t.year)[2:], 'PREVYEAR': str(_date_t.year-1), 'PREVVYEAR': str(_date_t.year-2), 'PREVVVYEAR': str(_date_t.year-3), 'NEXTYEAR': str(_date_t.year+1), 'DATE': _date_t.strftime('%Y%m%d'), 'THISDATE': _date_t.strftime('%d/%m/%Y'), 'LONG_DATE': _date_t.strftime('%d %B %Y'), 'DS_DATE': _date_t.strftime('%Y-%m-%d') } self.update(_DEFINES) if _date>_date_t: logger.warn('This is a REVISION for date %s',self['LONG_DATE']) elif _date<_date_t: logger.warn('This is a *P*REVISION for date %s',self['LONG_DATE'])
def exec_job_GET(options,*args,**kw): setattr(options,'job',args[0]) kw = udict(kw) what = [ x.upper() for x in options.options.xget('WHAT',('JOBDIR',))] labels = options.options.xget('LABELS') if labels: labels = eval(labels[0]) func = options.options.xget('func') if func: func = func[0] for k,v in kw.items(): locals()[k]=v for w in what: if w in kw: if labels: print args[0],w, V = eval(w) if func: print eval(func.replace('{}','"'+V+'"')) else: print V elif re.match('--list',w,re.I): for k in sorted(kw.keys()): print k
def get_elem_configs(*args,**kw): datareqs,processors,outputs = [],[],[] kw = udict(kw) base = dirname(kw['__FILE']) # ... to acquire data d = kw.xget('DATAREQ') if d: pd = _i(d,base,options.process_path) if pd: datareqs.append(pd) else: logger.warn('Data Request file "%s" not exists',d) # ..to process 'em d = kw.xget('PROCESSORS') if d: pd = _i(d,base,options.process_path) if pd: processors.append(pd) else: logger.warn('Processor Conf file "%s" not exists',d) # Job Label # Use section name if not exists LABEL key l = kw.xget('LABEL',args[0].upper()) # _os = kw.xget_list('OUTPUT') if _os: for d in _os: pd = _i(d,base,options.process_path) if pd: outputs.append(pd) else: logger.warn('Output Conf file "%s" not exists',d) for o in outputs: cfg = cfg2hash(o) jcfg = cfg.xget(l.upper()) if jcfg: base = dirname(jcfg.xget('__FILE')) d = jcfg.xget('DATAREQ') pd = _i(d,base,options.process_path) if pd: datareqs.append(pd) else: logger.warn('*Data Request file "%s" not exists',d) d = jcfg.xget('PROCESSOR') if d: pd = _i(d,base,options.process_path) if pd: processors.append(pd) else: logger.warn('*Processor Conf file "%s" not exists',d) return datareqs,processors,outputs
def update(self,indict): h = udict(indict) vdict.update(self,h) if hasattr(indict,'_vaprog'): self._varprog = indict._vaprog else: import re self._varprog = re.compile(r'\$(\w+|\{[^}]*\})')
def extract_dict_from_key(keylist,_dict1,_dict2,_defaults,base=udict(), klass=udict,retain_keys=False): """Get _dict2[_k for _k in _dict1[key]] if exists else if _dict1[key] is yes or no return {'ACTIVE': True/False, key: _dict1[key]} else {key: _dict1[key]} if can or else _defaults[key] or None""" if isinstance(keylist,(tuple,list)): _keys = keylist elif isinstance(keylist,dict): _keys = keylist.keys() elif isinstance(keylist,basestring): _keys = get_list(keylist) else: raise ValueError, "extract_dict_from_key - type is %s" % type(keylist) # logger.debug('{EXTRACT_DICT_FROM_KEY} keylist is %s: ',','.join(_keys)) _ret = klass(base) for key in _keys: if retain_keys: _key = key else: _key = None if not _dict1.has_key(key): if _defaults and _defaults.has_key(key): _val = klass(_defaults[key]) _ret.update(klass({_key:_val}) if _key else _val ) continue if re.match('^(yes|on|true)$',_dict1[key],re.I): _val = klass({'ACTIVE': True, key: _dict1[key]}) _ret.update(klass({_key:_val}) if _key else _val ) continue if re.match('^(no|off|false)$',_dict1[key],re.I): _val = klass({'ACTIVE': False, key: _dict1[key]}) _ret.update(klass({_key:_val}) if _key else _val ) continue _lst = get_list(_dict1[key]) for _l in _lst: if _dict2.has_key(_l): _val = klass(_dict2[_l]) _ret.update(klass({_key:_val}) if _key else _val ) else: _val = klass({key: _l}) _ret.update(klass({_key:_val}) if _key else _val ) # logdict(logger.debug,'{EXTRACT_DICT_FROM_KEY} result',_ret) return _ret
def exec_job_WIKI_PRE(options,section,jobs,jobspecfile,*args,**kw): print "WIKI PEW",section jobd = udict(jobspecfile[section]) if section in jobd: title = udict(jobspecfile[section]).xget('title') if not title: title=section xstr = '||%s|| ||%s|| || ||%s||%s||'%(section, _wiki_pdflink(options.web, options.work, options.phase, options.version, section, title), _trac_tickets('this',section), _trac_tickets('next',section), ) return xstr
def exec_job_LOAD(options,*args,**kw): setattr(options,'job',args[0]) kw = udict(kw) (datareqs,processors,outputs) = e4t.cli.get_elem_configs(*args,**kw) cfg = e4t.cli._read_cfgfile(kw,'OUTPUT',args[0]) _cfgs = (kw, cfg) _dataset = tsload_datareqs(options,None,*datareqs)
def xmloutjobs(w,name,*args,**kw): (datareqs,processors,outputs) = e4t.cli.get_elem_configs(*args,**kw) cfg = e4t.cli._read_cfgfile(kw,'OUTPUT',name) _cfgs = (kw, cfg) if cfg: # pprint(cfg) cfg = udict(cfg) cfg.remove('^__.*') cfg = ldict([ (k.replace(' ','_'),v) for k,v in cfg.items()]) w.start("job",name=name,**ldict(cfg)) w.end("job")
def exec_job_TOC(options,*args,**kw): kw = udict(kw) kind = kw.xget('KIND') if kind is None: return num = kw.xget('num','<?>') title = kw.xget('title','<NO TITLE>') label = kw.xget('label','nolabel') tkind = kw.xget('toc kind',kind) xstr = "" if not re.search('^page$',kind,re.I): (datareqs,processors,outputs) = e4t.cli.get_elem_configs(*args,**kw) for ou in outputs: if not exists(ou): ou = join(options.process_path,ou) if not exists(ou): logger.error('E01#001: Il file di specifica del processore %s non esiste',pr) raise ValueError, 'E:MKEL:001' wcfg = udict(cfg2hash(ou)) if wcfg: lcfg=udict(wcfg.xget(label)) if lcfg: if re.match('^multiple$',kind,re.I): els = lcfg.xget_list('elements') for el in els: clcfg=udict(wcfg.xget(el)) num = clcfg.xget('num',num) title = clcfg.xget('title',title) tkind = clcfg.xget('kind',tkind) label=el.lower() xstr += "\\sriel{%s}{%s}{%s}{%s}\n" % (tkind.title(),num.upper(),title,label) num = None else: num = lcfg.xget('num',num) title = lcfg.xget('title',title) tkind = lcfg.xget('kind',tkind) if num: xstr += "\\sriel{%s}{%s}{%s}{%s}" % (tkind.title(),num.upper(),title,label) return xstr
def exec_job_TYPESET(options,*args,**kw): """Typeset .tex file for JOB""" kw = udict(kw) label = kw.xget('LABEL') if label: label = label.upper() outfile = kw.xget('OUTFILE') if not outfile: outfile = "%s.pdf" % label if not exists("%s.tex" % label): raise IOError, "File %s.tex not exists" % label goutput = options.define_set.expand(outfile) if goutput[0]!='/': goutput = join(options.output_path,goutput) save_latex(options,label,goutput)
def exec_job_COMPILE(options,*args,**kw): """Make .tex file for JOB""" kw = udict(kw) base = join(options.process_path,'input') copy_file(kw,base) datareqs, processors, outputs = e4t.cli.get_elem_configs(*args, **kw) _dataset = None elapsed = [.0,.0,.0] if datareqs: with Timer() as t: _dataset = tsload_datareqs(options,_dataset,*datareqs,**kw) logger.debug("TIME tsload %f",t.secs) elapsed[0]=t.secs if processors: with Timer() as t: _dataset = process_dataset(options,_dataset,processors,*args,**kw) logger.debug("TIME process %f",t.secs) elapsed[1]=t.secs if not _dataset: logger.error('VOID dataset on output step') # raise IOError('VOID dataset') if True: # outputs: with Timer() as t: output_dataset(options,_dataset,outputs,*args,**kw) logger.debug("TIME output %f",t.secs) elapsed[2]=t.secs else: logger.error('No conf for output step') raise IOError('VOID output') setattr(options,'elapsed',elapsed) if options.switch_debug: # Write the report file e4t.cli.save_accounting("%s.cli" % options.job) codecs.open("%s.time" % options.job, 'w', 'utf-8').write(u"tsload: %f\nprocss: %f\noutput: %f\n" % (elapsed[0], elapsed[1], elapsed[2]))
def exec_job_EXPORT(options,*args,**kw): setattr(options,'job',args[0]) kw = udict(kw) formats = options.options.xget('FORMAT',('XLS',)) phase = [ x.upper() for x in options.options.xget('PHASE',('PROCESS',))] logger.debug('EXPORT FILE OUTPUT FOR %s - f:%s - p:%s', ','.join(args), ','.join(formats), ','.join(phase)) (datareqs,processors,outputs) = e4t.cli.get_elem_configs(*args,**kw) cfg_T = e4t.cli._read_cfgfile(kw,'OUTPUT') if cfg_T is None: return cfg = cfg_T.xget(args[0]) if re.match('multiple',cfg.xget('KIND'),re.I): elements = cfg.xget_list('ELEMENTS') cfgs = dict([ (el,cfg_T.xget(el)) for el in elements ]) cfg.update(cfgs) _cfgs = (kw, cfg, cfg_T) _dataset = tsload_datareqs(options,None,*datareqs) logger.debug('data loaded') if 'LOAD' in phase: save_ds_with_fmts(formats,_dataset,_cfgs,'raw',*args,**kw) if len(phase)==1: return _dataset = process_dataset(options,_dataset,processors) logger.debug('data processed') if 'PROCESS' in phase: save_ds_with_fmts(formats,_dataset,_cfgs,'proc',*args,**kw) if 'DEFINITION' in phase: save_definition(options,formats,_dataset,outputs,_cfgs,'def',*args,**kw)
def exec_show_SOLR(options, jlist, base, jobspecfile, order, jjspec): global WEB, PUBLIC, JOB, OUTFILE, OUTPUT, PAGE ret, jlist, base, jobspecfile, order, jjspec = e4t.cli.select_jobs_from_jobfile(options.jobfile) WEB = options.web + "/e4t-pipelines/gialla/stable/current/" with open("RESULT.csv", "wb") as csvfile: cwriter = UnicodeWriter(csvfile, delimiter=";", quotechar='"', quoting=csv.QUOTE_MINIMAL) for section, jobs in jlist.items(): PUBLIC = section PAGE = 3 for job in jobs: jcfg = udict(job[1]) PAGE += 1 OUTFILE = jcfg.xget("OUTFILE") XLSFILE = jcfg.xget("OUTFILE") OUTPUT = jcfg.xget("OUTPUT") JOB = job[0] try: r = e4t.cli.exec_func("exec_show_job_SOLR", cwriter, job[0], **job[1]) except: pass
def expand(self,path): """Expand parameters of form $var and ${var}. Unknown variables are left unchanged. @param path: path string to expand @type path: unicode @returns: the path string with parameters expanded """ if len(self)==0: self.setUp() if isinstance(path,udict): return udict([ (k,self.expand(unicode(v))) for k,v in path.items() ]) if '$' not in path: return path if not self._varprog: import re self._varprog = re.compile(r'\$(\w+|\{[^}]*\})') i = 0 while True: m = self._varprog.search(path, i) if not m: break i, j = m.span(0) name = m.group(1) if name.startswith('{') and name.endswith('}'): name = name[1:-1] if name in self: tail = path[j:] path = path[:i] + unicode(self[name]) i = len(path) path += tail else: i = j return path
def cast_dict(anInput,aCastDict,aOptsDict=None): if anInput is None: return None # logger.debug('{CAST_DICT} anInput %s',anInput) # logdict(logger.debug,'{CAST_DICT} aCast %s'%type(aCastDict),aCastDict) if isinstance(anInput,(dict,ldict,udict)): aList = (anInput,) elif isinstance(anInput,(list,tuple)): aList = anInput else: raise ValueError, "{CAST_DICT}" oList = [] cd = udict(aCastDict) for aDict in aList: _res = {} if aDict: for k,v in aDict.items(): if cd.has_key(k): spec = cd[k] cast = spec # logger.debug('{CAST_DICT} loop %s %s | %s=%s',type(spec),type(subsection),k,v) if isinstance(cast,tuple): cast = spec[1] spec = spec[0] value = cast(spec(v)) elif cast==subsection: value=None if aOptsDict.has_key(v): value = aOptsDict[v] # logger.debug('{CAST_DICT} SUBSECTION %s',value) else: # print cast value = cast(v) _res[k]=value # logdict(logger.debug,'{CAST_DICT} aDict %s'%type(aDict),aDict) # logdict(logger.debug,'{CAST DICT} rDict %s'%type(_res),_res) oList.append(_res) return oList
def rework_acct(): nacct = udict() if "FORMULE" in acct: for n, v in acct["FORMULE"].items(): nacct[n] = v nacct[n]["kind"] = "formula" if "PROVIDER" in acct: for n, p in acct["PROVIDER"].items(): for k, v in p.items(): if k is None: continue if "=" in k: continue if k not in nacct: nacct[k] = {} if "kind" in nacct[k]: nacct[k]["kind"] += ",provider" else: nacct[k]["kind"] = "provider" nacct[k]["url"] = v[0][0] nacct[k]["kw"] = v[1] nacct[k]["provider"] = n return nacct
def exec_show_job_SOLR(options, cwriter, *args, **kw): global SOURCE, TITLE, SUBTITLE, PTITLE, LABEL, DATA, FORMULA, URL, PROCESSED, DOWNLOADED, DATAREQ, PROCESSOR, PID if re.match("^(|us|jp|uk|in|ch|br|ru|po|tu)struct|cfa|cpt|uslms$", args[0], re.I): return setattr(options, "job", args[0]) kw = udict(kw) base = join(options.process_path, "input") (datareqs, processors, outputs) = e4t.cli.get_elem_configs(*args, **kw) if len(datareqs) == 0: return acct = {} DATAREQ = datareqs[0].replace(options.process_path, "") if datareqs[0] else "" _dataset = tsload_datareqs(options, None, *datareqs, **kw) prcfg = {} if len(processors) > 0: pr = processors[0] PROCESSOR = pr prcfg = udict() if exists(pr): pr = join(options.process_path, pr) if exists(pr): prcfg = cfg2hash(pr).xget_dict("PROCESSOR") ou = outputs[0] if not exists(ou): ou = join(options.process_path, ou) if not exists(ou): logger.error("E01#001: Il file di specifica del processore %s non esiste", pr) raise ValueError, "E:MKEL:001" wcfg = cfg2hash(ou) jcfg = wcfg.xget_dict(options.job) SOURCE = clean_latex(jcfg.xget("SOURCE").replace("\n", " ") if "SOURCE" in jcfg else None) TITLE = clean_latex(jcfg.xget("TITLE").replace("\n", " ") if "TITLE" in jcfg else None) SUBTITLE = clean_latex(jcfg.xget("SUBTITLE").replace("\n", " ") if "SUBTITLE" in jcfg else None) KIND = jcfg.xget("KIND") if not re.match("figure|table", KIND, re.I): return _base_ds = set(_dataset.keys()) _dataset = process_dataset(options, _dataset, processors, *args, **kw) _base_acct = udict() nacct = rework_acct() _base_acct.update(nacct) _proc_ds = set(_dataset.keys()) - _base_ds if re.match("figure", KIND, re.I): panels = jcfg.xget_list("PANELS") for panel in panels: pcfg = wcfg.xget_dict(panel) PTITLE = pcfg.xget("TITLE").replace("\n", " ") if "TITLE" in pcfg else None PTITLE = clean_latex(PTITLE) series = [] series1 = pcfg.xget_list("SERIES") series2 = pcfg.xget_list("LEFT SERIES") series3 = pcfg.xget_list("RIGHT SERIES") if series1: series.extend(series1) if series2: series.extend(series2) if series3: series.extend(series3) for serie in series: LABEL = serie DATA = serie FORMULA = None URL = "" PROVIDER = "" scfg = wcfg.xget_dict(serie) if scfg: LABEL = clean_latex(scfg.xget("LABEL")) DATA = scfg.xget("DATA") PROCESSED = DATA in _proc_ds DOWNLOADED = DATA in _base_ds if PROCESSED: FORMULA = prcfg.xget(DATA).replace("\n", " ") if DATA in prcfg else None if DOWNLOADED: if DATA not in _base_acct: print DATA, "not in acct", _base_acct.keys(), ",".join(_base_ds) else: D = _base_acct.xget(DATA) P = D["provider"] URL = P + "://" + _base_acct.xget(DATA)["url"] PROVIDER = P PROCESSED = str(PROCESSED) if PROCESSED else None DOWNLOADED = str(DOWNLOADED) if DOWNLOADED else None WPROCESSOR = "" if not PROCESSOR else WSOURCE + PROCESSOR.replace(options.process_path, "") XLSRAW = WEB + "ds/%s-raw.xls" % JOB.lower() XLSPROC = WEB + "ds/%s-proc.xls" % JOB.lower() if PROCESSOR else "" XTRNAL = "True" if re.match("^dstream|http|file", PROVIDER) is not None else None PID += 1 cwriter.writerow( ( str(PID), PUBLIC, str(PAGE), KIND, JOB, WEB + OUTFILE, WSOURCE + DATAREQ, WPROCESSOR, WSOURCE + OUTPUT, XLSRAW, XLSPROC, TITLE, SUBTITLE, SOURCE, PTITLE, LABEL, DATA, DOWNLOADED, PROVIDER, URL, PROCESSED, FORMULA, XTRNAL, ) ) pass elif re.match("table", KIND, re.I): blocks = jcfg.xget_list("BLOCK") if not blocks: return orient, m, n, ELS = _extract_data_matrix_def(prcfg, _dataset) line = 0 TITLES = [] for b in blocks: bcfg = wcfg.xget_dict(b) models = bcfg.xget_list("MODEL") PTITLE = bcfg.xget("TITLE").replace("\n", " ") if "TITLE" in bcfg else None if PTITLE: PTITLE = clean_latex(PTITLE) else: PTITLE = TITLE for mod in models: mod = clean_model(mod) if not mod: continue LABEL = mod DATA = mod FORMULA = None URL = "" PROVIDER = "" scfg = wcfg.xget_dict(mod) if scfg: TITLES.append(clean_latex(scfg.xget("TITLE"))) line += 1 lines = line for line in range(lines): series = ELS[line] for n, serie in series: DATA = serie PROCESSED = DATA in _proc_ds DOWNLOADED = DATA in _base_ds if PROCESSED: FORMULA = prcfg.xget(DATA).replace("\n", " ") if DATA in prcfg else None if DOWNLOADED: if DATA not in _base_acct: print DATA, "not in acct", _base_acct.keys(), ",".join(_base_ds) else: D = _base_acct.xget(DATA) P = D["provider"] URL = P + "://" + _base_acct.xget(DATA)["url"] PROVIDER = P PROCESSED = str(PROCESSED) if PROCESSED else None DOWNLOADED = str(DOWNLOADED) if DOWNLOADED else None WPROCESSOR = "" if not PROCESSOR else WSOURCE + PROCESSOR.replace(options.process_path, "") XLSRAW = WEB + "ds/%s-raw.xls" % JOB.lower() XLSPROC = WEB + "ds/%s-proc.xls" % JOB.lower() if PROCESSOR else "" XTRNAL = "True" if re.match("^dstream|http|file", PROVIDER) is not None else None TITLE = TITLES[line] PID += 1 cwriter.writerow( ( str(PID), PUBLIC, str(PAGE), KIND, JOB, WEB + OUTFILE, WSOURCE + DATAREQ, WPROCESSOR, WSOURCE + OUTPUT, XLSRAW, XLSPROC, TITLE, SUBTITLE, SOURCE, PTITLE, LABEL, DATA, DOWNLOADED, PROVIDER, URL, PROCESSED, FORMULA, XTRNAL, ) ) elif re.match("multiple", KIND, re.I): pass else: logger.error("E01#002: Il file di specifica del processore %s non esiste", pr) raise ValueError, "E:MKEL:001"
def select_jobs_from_jobfile(job): """ :param job: an url, a dict or a file-name of the jobfile :returns: a tuple containing 1. return code (0 is ok, otherwise fail) 2. the jobs dict 3. the base processing directory 4. the whole jobfile 5. the order of the jobs to run 6. the list of jobs sections """ logger.debug('JOBFILE=%s',job) fname = job f = None jjobs=udict() base = None job_specfile = None if is_url(job,DWNL_URL): f = download(job) fname = f.name if is_dict(job): f = True fname = setup_job(job) elif not is_file(fname): base = options.process_path logger.debug('BASE=%s',base) if is_file(fname,base): fname = join(base,fname) else: logger.error('E:MAIN:JOB:001 -- job spec file %s not exists (%s)', job,fname) raise IOError('E:MAIN:JOB:001') # Esegui il lavoro if options.switch_verbose: print '==================== Execute JOBFILE %s' % fname (ret, base, job_specfile) = _read_specfile(fname) if ret!=0: return ret if not options.process_path: options.process_path = dirname(fname) macros = job_specfile.xget('macros') if macros: macros = job_specfile['macros'] logger.debug('Found MACROS in jobsfile (%s)',macros) # Select sections to operate on names = options.name _a = udict() if names is None: # If ha no name select BASE section if 'BASE' in job_specfile: basespec = job_specfile.xget_dict('BASE') if 'JOBS' in basespec: names = ('BASE',) # logger.debug('Take Jobs from BASE (%s)',','.join(names)) _a['cli.jobs']='base' elif 'JOB' in job_specfile: # logger.debug('Make synthetic joblist for JOB') _a['cli.jobs']='synth' job_specfile['BASE']=udict(JOBS='JOB',kind='joblist') names=('BASE',) elif options.only: # logger.debug('Make synthetic joblist for ONLY jobs') _a['cli.jobs']='synth-only' job_specfile['BASE']=udict(JOBS=','.join(options.only),kind='joblist') names=('BASE',) if isinstance(names,basestring): names = names.split(',') if not names: logging.error('No specfile joblist present in job spec file') return _a['cli.jobs']=','.join(names) # logger.debug('Sections to operate on %s',','.join(names)) _accounting.update(_a) jjobs,order,jjspec = select_jobs_from_joblist(names,job_specfile,options,macros) _accounting['cli.order']=','.join(order) if f: if not options.switch_debug: os.unlink(fname) _accounting['fname']='deleted' return 0,jjobs,base,job_specfile,order,jjspec
DWNL_URL = re.compile("^[\t ]*(https?|file|ftp)(?::(?:///?)?)([^/]+)([^ ?[\n\t<]+)(\?[^ [\n\t<]+)?") def set_options(opts): global options options = opts from e4t.cli.cache import * from e4t.cli.make import * from e4t.cli.index import * from e4t.cli.jobs import * from e4t.cli.show import * from e4t.cli.typeset import * logger = logging.getLogger(__name__) _accounting = udict() def read_conf_file(cfg="~/.e4trc",options=Options(),section="E4T"): """Reads configuration file for resource into options before command line :param cfg: configuration file name (defaults to ~/.srirc :type cfg: file name :param options: options to fill :type options: Option class .. todo:: move to utils """ opts = { 'process_path' : str, 'output_path': str, 'input_path' : str, 'pageno' : bool,
def exec_job_WIKI(options,*args,**kw): kw = udict(kw) kind = kw.xget('KIND') if kind is None: return num = kw.xget('num') title = kw.xget('title','<NO TITLE>') label = kw.xget('label','nolabel') tkind = kw.xget('toc kind',kind) home = dirname(options.jobfile) tfile = kw.xget('__file') if tfile: tfile=tfile.replace(home,'') tfile=dirname(tfile) else: tfile="" xstr = "" if not re.search('^page|cover|empty$',kind,re.I): (datareqs,processors,outputs) = e4t.cli.get_elem_configs(*args,**kw) for ou in outputs: if not exists(ou): ou = join(options.process_path,ou) if not exists(ou): logger.error('E01#001: proc spec file %s not exists',pr) raise ValueError, 'E:MKEL:001' wcfg = udict(cfg2hash(ou)) if wcfg: lcfg=udict(wcfg.xget(label,{})) if lcfg: if re.match('^multiple$',kind,re.I): els = lcfg.xget_list('elements') for _i,el in enumerate(els): clcfg=udict(wcfg.xget(el)) num = lcfg.xget('num',num) title = clcfg.xget('title',title) tkind = clcfg.xget('kind',tkind) tlabel=el.lower() tfile = clcfg.xget('__file') if tfile: tfile=tfile.replace(home,'') tfile=dirname(tfile) else: tfile="" print tfile xstr += '||%s||%s||%s||%s||%s||%s||%s||'%(label,num, _wiki_pdflink(options.web,options.work,options.phase,options.version,label,title), _svn_conflink(tfile,'SRC'), _wiki_xlslink(options,kw,options.output_path,options.web,options.work,options.phase,options.version,label,title), _trac_tickets('this',label), _trac_tickets('next',label) ) num = None if _i<=(len(els)-1): xstr += '\n' else: num = lcfg.xget('num',num) title = lcfg.xget('title',title) tkind = lcfg.xget('kind',tkind) tfile = lcfg.xget('__file') if tfile: tfile=tfile.replace(home,'') tfile=dirname(tfile) else: tfile="" title = re.sub('.tmark\[[^\]]\]','',title,re.I) if num: if num!="0": xstr = '||%s||%s||%s||%s||%s ||%s||%s||'%(label,num, _wiki_pdflink(options.web,options.work,options.phase,options.version,label,title), _svn_conflink(tfile,'SRC'), _wiki_xlslink(options,kw,options.output_path,options.web,options.work,options.phase,options.version,label,title), _trac_tickets('this',label), _trac_tickets('next',label), ) return xstr
def writeout_joblist_file(elem,dname,pname,options=Options()): """Write a joblist file from a data-request and processor file :param elem: output specification :kind elem: string or filename :param dname: data-request file name :kind dname: filename :param pname: processor step file name :kind pname: filename if elem is none then set up only dataset (and processor, if any) steps in joblist. """ def _get_elem_kind(conf): if re.match('^T_',conf): return 'table' elif re.match('^F_',conf): return 'figure' return 'composite' print "E=",elem if not elem: # dataset _D = { 'REQ' : { 'active' : 'yes', 'kind' : 'figure', 'panels': 'A11', 'datareq': dname }, } _D['A11'] = { 'SERIES': 'OUTS' } ofile = writeout_file(_D,header='# output file',options=options) _d = { 'dataset': 'output.xls', 'kind': 'figure', 'output': ofile } if pname: _d['processor']=pname _dict={ 'BASE' : { 'jobs': 'JOBS' } , 'JOBS': { 'JOBS':'REQ', 'KIND': 'joblist' }, 'REQ' : _d } setattr(options,'load',True) return writeout_file(_dict,header='# joblist file',options=options) kind = 'figure' # Se esiste un output file _dict = {} for i,e in enumerate(elem): print i,e conf = None if is_dict(e): # l'output file deve essere generato dell'opzione stessa _d = udict(as_dict(e)) if not nested_dict(_d): # opzione semplice (es. series=A,B,C) # Aggiungo _D = { 'REQ' : { 'active' : 'yes', 'kind' : 'figure', 'panels': 'A11' }, } _D['A11'] = _d _d = _D label = 'REQ' _dict.update(_d) else: if ':' in e: if e.count(':')==2: (kind,label,conf) = e.split(':') elif e.count(':')==1: (label,conf) = e.split(':') kind = _get_elem_kind(conf) elif e.count(':')>2: raise ValueError, e if conf: _dict = cfg2hash(conf) options.process_path=dirname(conf) if conf is None: conf = e kind = _get_elem_kind(conf) m = re.search('[TFC]_([a-zA-Z0-9_]+)$',conf) if m: label = m.group(1) else: label = "ELEM" _d = { 'kind' : kind, 'label': label, 'output' : conf, } _dict[label]=_d oname = writeout_file(_dict,header='# output file',options=options) if 'OUT' in options.look: lookout(oname,'Configurazione di output',options=options) _d = udict({ 'dataset': 'output.xls' , 'kind': kind, 'output': '%s' % oname, }) if dname: _d['datareq']=dname if pname: _d['processor']=pname _dict={ 'BASE' : { 'jobs': 'JOBS' } , 'JOBS': { 'JOBS':label, 'KIND': 'joblist', 'files': 'sristat.sty' }, label : _d } return writeout_file(_dict,header='# joblist file',options=options)
def _output_element(options,name,specs,_dataset,elem_class,fname,**kw): """Crea il file tex per l'output del dataset a partire dal file conf""" _accounting['output.name'] = name _accounting['output.class'] = elem_class _accounting['output.datalen'] = len(_dataset) if _dataset else 0 logger.debug('Esportazione documento %s con classe %s (Dlen=%s)',name,elem_class,_dataset) if elem_class is None: return ## Lettura del file di specifica # for spec in specs.split(','): if re.search('\.req$',spec): continue if not exists(spec): spec = join(options.process_path,spec) if not exists(spec): logger.error('E01#001: Il file di specifica dell\'elemento %s non esiste (%s)',name,spec) raise ValueError, 'E:MKEL:001' if exists(spec): spec = cfg2hash(spec) kw=udict(kw) if 'macros' in kw: spec['macros']=kw['macros'] else: logger.error('{OUTPUTELEMENT}:E:INIT:001 -- Il file di configurazione %s non esiste', spec) raise ValueError, "E:OUTPUTELEMENT:INIT:001" try: spec = mapspec(name,spec,_dataset,options, lambda k,v,name,spec,_dataset,options: (k, expandfuncs(v,name,spec,_dataset,options))) except IndexError, exc: logger.error('{OUTPUTELEMENT}:E:MAPSPEC:001 -- non posso espandere le funzioni') raise # test = expandfuncs('%TEST()',name,spec,_dataset,options) with Timer() as T: t = elem_class(name,spec,_dataset,options) _accounting['output.time.setup']=T.msecs if hasattr(t,'produce'): with Timer() as T: t.produce() _accounting['output.time.produce']=T.msecs l = LaTeX(options) # if False and hasattr(options,'switch_components') and options.switch_components: # try: # output = join(options.output_path,fname.replace('.tex','.pdf')) # logger.debug('Compilazione componente %s',fname) # with Timer() as T: # l.compile(name,t,fname=output) # _accounting['output.time.latex.compile']=T.msecs # _accounting['output.time.latex.output']=output # outfiles.append(output) # except IOError, exc: # logger.error('Non posso scrivere il file %s',output) _accounting['output.outfile']=fname with Timer() as T: ofile = fname f = open(ofile, 'w') f.write(l.layout(t,False)) f.close() outfiles.append(ofile) _accounting.update(l.report()) _accounting['output.time.write.layout']=T.msecs if options.switch_debug: # Write the report file codecs.open("%s.rep"%name,'w','utf-8').write(t.report()) fname = "%s.out" % name logger.debug('Write out %s',fname) save_dict(fname,_accounting,options.switch_debug) _accounting.clear()
def select_jobs_from_joblist(names,job_specfile,options,macros): jjobs = udict() order = list() jjspec = list() for name in names: name = name.upper() if name not in job_specfile: logger.warn('No job %s in specfile',name) continue jspec = options.define_set.expand(job_specfile.xget_dict(name)) #if options.switch_debug: # logger.debug('[%s] on %s',name,dictview(jspec)) needed(jspec,'JOBS','KIND') if not re.match('^joblist$',jspec.xget('KIND'),re.I): logger.warn('Section "%s" kind not JOBLIST (kind!=joblist)',name) continue jjspec.append(jspec) if 'DEFINES' in jspec: defname = jspec.xget('DEFINES') defines = job_specfile.xget_dict(defname) J_defines = options.define_set.expand(defines) J_defines.remove('__.*') options.define_set.update(J_defines) # Jobs management in joblist jobs = jspec.xget_list('JOBS') only = options.only if options.only else list() exclude = options.exclude if options.exclude else list() excluding = jspec.xget_list('EXCLUDE') onlying = jspec.xget_list('ONLY') if excluding: exclude.extend(excluding) if onlying: only.extend(onlying) if only: jobs = only if exclude: for j in exclude: if j in only: continue if j in jobs: jobs.remove(j) # Take only existing jobs jobs = [ j for j in jobs if j in job_specfile ] # if options.switch_debug: # logger.debug('From %s executing%s: %s' % (name," only" if options.only or options.exclude else "",','.join(jobs))) # Gestione della keyword --selectjobs # handle requests as --select 'kind=figure|table' for j in jobs: cfg = options.define_set.expand(job_specfile.xget_dict(j)) kj = cfg.xget('KIND',None) if re.match('^joblist$',kj,re.I): knames = cfg.xget_list('JOBS') kjjobs,korder,kjjspec = select_jobs_from_joblist(knames,job_specfile,options,macros) jjobs.update(kjjobs) order.extend(korder) jjspec.extend(kjjspec) continue if macros: cfg['macros']=macros logger.debug('Adding MACROS to element (%s)',cfg['__LABEL']) bpass = True for k,v in options.selectjobs.items(): if isinstance(v,list): vk = v[0] if not re.match(vk,cfg[k],re.I): bpass = False break else: if k not in cfg: bpass = False break if bpass: if name not in jjobs: jjobs[name]=[] jjobs[name].append((j,cfg)) order.append(name) #-------- return jjobs,order,jjspec
def exec_job_TOC_PRE(options,section,jobs,jobspecfile,*args,**kw): title = udict(jobspecfile[section]).xget('title') return '\ctable[width=\columnwidth,pos=\tocpos]{rXr}{}{\srichapt{}{%s}'%title