def _read_xls(self,fname,ext,kw): logger.debug('Read_XLS from %s',fname) TS = {} kw = udict(kw) book = xlrd.open_workbook(fname) def _get_sheet(book,kw): # get sheet get_sheet = book.sheet_by_index sheetname = kw.xget('SHEET') if sheetname: sheet =exls.get_sheet(book,sheetname) else: sheetname = kw.xget('SHEETNUM',0) sheet = book.sheet_by_index(sheetname) if sheet is None: logger.error('Non posso acquisire il foglio %s',sheetname) raise ValueError, "Sheet" return sheet sheet = _get_sheet(book,kw) def _get_orient(kw): # Orientamento dei dati (H -> Orizzontale, V -> Verticale) orient = kw.xget('ORIENT','V') if orient not in ( 'H', 'V' ): logger.warn('ORIENT deve essere tra H o V, non %s. Uso V!', orient) orient = 'V' return orient orient = _get_orient(kw) # SIZE nrows = sheet.nrows ncols = sheet.ncols # Date or index dates = kw.xget('DATES',"A2:") index = kw.xget('INDEX') Aindex = index if index else dates logging.debug("{PARMS}-%s %s %s %s", 'IDX' if index else 'DAT', sheet.name, Aindex, orient) (BC,BR,EC,ER) = _get_total_range(sheet,Aindex,orient) logger.debug("{RANGE} %s:%s",_Aref(BC,BR),_Aref(EC,ER)) date_values = _get_values(sheet,BC,BR,EC,ER,kw) def _get_index(date_values): v = [ int(d) for d in date_values.flatten() ] date_array = np.array(v) return date_array # Find index as Vector or DateRange if index: date_array = _get_index(date_values) else: # TIMESERIES def _get_tseriesD(): pass def _get_tseriesA(freq,date_values,kw): v = [ int(d) for d in date_values.flatten() if not np.isnan(d) ] D = [ Period(freq=str(freq),year=_v) for _v in v] date_array = PeriodIndex(D) return date_array def _get_tseriesQ(freq,date_values,kw): # print date_values by=0 if kw.has_key('YEAR'): by = eval(kw['YEAR'])-1 v = [ (int(d)-1)%4+1+(int(_i/4)*4)+by*4 for _i,d in enumerate(date_values.flatten()) if not np.isnan(d) ] D = [ Period(freq=str(freq),value=_v) for _v in v] date_array = PeriodIndex(D) return date_array def _get_tseriesM(freq,date_values,kw): dformat='INT' if kw.has_key('DFORMAT'): dformat = kw['DFORMAT'] if dformat=='INT': v = [ (int(d)-1)*12+i%12+1 for i,d in enumerate(date_values.flatten()) if not np.isnan(d) ] D = [ Period(freq=str(freq),value=_v) for _v in v] elif dformat=='XL_DATE': v = [ xlrd.xldate_as_tuple(d,0) for i,d in enumerate(date_values.flatten()) if not np.isnan(d) ] D = [ Period(freq=str(freq),year=_v[0],month=_v[1]) for _v in v] else: logger.error('DATE FORMAT NOT SUPPORTED ON EXCEL READING') raise ValueError, dformat # print "FREQ=|%s|"%freq,D date_array = PeriodIndex(D) return date_array def _get_tseriesD(freq,date_values,kw): v = [ xlrd.xldate_as_tuple(int(d),0) for i,d in enumerate(date_values.flatten()) if not np.isnan(d) ] D = [ Period(freq=str(freq),year=_v[0],month=_v[1],day=_v[2]) for _v in v] date_array = PeriodIndex(D) return date_array op = { 'A': _get_tseriesA, 'Y': _get_tseriesA, 'Q': _get_tseriesQ, 'M': _get_tseriesM, 'D': _get_tseriesD, } freq = 'D' if 'FREQ' in kw: freq = kw['FREQ'].strip() if freq in op: fnc = op[freq] date_array = fnc(freq,date_values,kw) else: logger.error('UNKNOWN FREQ %s',freq) raise ValueError, 'UNKNOWN FREQ %s',freq else: logger.error('ABSENT FREQ %s',freq) raise ValueError, 'ABSENT FREQ %s',freq logger.debug("IND: %s", ','.join([ str(x) for x in date_array])) # serie series = "B2:" if kw.has_key('SERIES'): series = kw['SERIES'] (Bc,Br,Ec,Er) = _get_total_range(sheet,series,invert(orient)) Nseries = (Er - Br) if orient == 'H' else (Ec - Bc) B = Bc if orient == 'V' else Br E = Ec if orient == 'V' else Er logger.debug("(Bc=%d,Br=%d,Ec=%d,Er=%d,B=%d,E=%d)",Bc,Br,Ec,Er,B,E) # Gestione dei nomi delle serie name = kw.xget('NAME','TS') logger.debug("{NAME} %s",name) _cmp = name if ' ' not in name and ':' in name: logger.debug("{NAME} with : %s",name) _cmp=[] _fmt = "%s" _n = name _f = str m = rx_range.search(_n) _name = [name,] if m: (_bc,_br,_ec,_er) = _get_range4(m.group(0)) logger.debug("%s",(_bc,_br,_ec,_er)) F = [ _f(v) if len(unicode(v))>0 else None for v in _get_values_list(sheet,_bc,_br,_ec,_er,kw)] _name = [ (_fmt.lower() % _p) if _p is not None else "_${NUM}_!" for _p in F ] elif ' ' in name: _cmp = [] _ns = name.split(' ') M = 0 for i,_n in enumerate(_ns): _f = str _fmt = "%s" if '|' in _n: (_n,_f,_fmt)=_n.split('|') _f = eval(_f.lower()) if '=' in _n: m = rx_range_eq.search(_n) if m: (_bc,_br,_ec,_er) = _get_range4(m.group(1)) logger.debug("%s",(_bc,_br,_ec,_er)) F = [ _f(v) if len(unicode(v))>0 else None for v in _get_values_list(sheet,_bc,_br,_ec,_er,kw)] # print "FMt=",F F = [ (_fmt.lower() % _p) if _p is not None else "_${NUM}_!" for _p in F ] _cmp.append( F ) M = max(M,len(F)) else: _cmp.append([_f(_n),]) M = max(M,1) from itertools import cycle for p in range(0,len(_cmp)): _P = _cmp[p] if len(_P)<M: _cmp[p] = [] for i,_p in enumerate(cycle(_P)): if i < M: _cmp[p].append(_p ) else: break _P = _cmp[p] _cmp[p] = _P _name = map(operator.add, *_cmp) else: _name = [ "%s${NUM}" % _cmp , ] Nnames = len(_name) if Nnames<Nseries: for i in range(Nnames,Nseries): _name.append("TS${NUM}") for i,x in enumerate(range(B,E+1)): #print _name #print i,x,len(_name) _n = _name[i].strip() _ts = None if _n in TS.keys(): _n += "_${NUM}_?" N = Template(_n).safe_substitute({'NUM': i}) N = unicode( N.upper().strip() ) #.decode('utf-8') #print u"reading series for %s" % N.encode('ascii','xmlcharrefreplace') #print " on ", x, "Row" if orient=='H' else "Column" if orient == 'H': _tv = _get_values(sheet,BC,x,EC,x,kw) elif orient == 'V': _tv = _get_values(sheet,x,BR,x,ER,kw) #print "_tv",_tv _tv = np.array([ _v[0] for _v in _tv ]) #print "_tv2",_tv # print date_array else: logger.error('ORIENTTION ERROR') raise ValueError, 'ORIENT' if index is None: # isa timeseries #print "Tv=",_tv #print date_array _ts = Series(_tv[0:len(date_array)],index=date_array) TS[N] = _ts else: # isa np.array TS[N] = _tv # print "TS[%s]"%N.encode('ascii','xmlcharrefreplace'),TS[N] # _report(_ts) logger.debug("Read %d series from excel: %s",Nseries,','.join( sorted(TS.keys()))) # Rename Phase if kw.has_key('RENAME'): rename_desc = get_list(kw['RENAME']) for r in rename_desc: (_f,_t) = r.split('>') if TS.has_key(_f): if not TS.has_key(_t): TS[_t]=TS[_f] del TS[_f] else: logger.error('La serie %s giĆ esiste nell\'IS',_t) else: logger.error('La serie %s non esiste nell\'IS',_f) # Drop Phase logger.debug("Read XLS : ") # for k,t in TS.items(): # print k # print tsinfo(t) # look(t._data) return TS