Example #1
0
    def _read_xls(self,fname,ext,kw):
        logger.debug('Read_XLS from %s',fname)
        TS = {}
        kw = udict(kw)

        book  = xlrd.open_workbook(fname)

        def _get_sheet(book,kw):
            # get sheet
            get_sheet = book.sheet_by_index
            sheetname = kw.xget('SHEET')
            if sheetname:
                sheet =exls.get_sheet(book,sheetname)
            else:
                sheetname = kw.xget('SHEETNUM',0)
                sheet = book.sheet_by_index(sheetname)
            if sheet is None:
                logger.error('Non posso acquisire il foglio %s',sheetname)
                raise ValueError, "Sheet"
            
            return sheet

        sheet = _get_sheet(book,kw)

        def _get_orient(kw):
            # Orientamento dei dati (H -> Orizzontale, V -> Verticale)
            orient = kw.xget('ORIENT','V')
            if orient not in ( 'H', 'V' ):
                logger.warn('ORIENT deve essere tra H o V, non %s. Uso V!', 
                            orient)
                orient = 'V'
            return orient

        orient = _get_orient(kw)

        # SIZE
        nrows = sheet.nrows
        ncols = sheet.ncols

        # Date or index        
        dates = kw.xget('DATES',"A2:")
        index = kw.xget('INDEX')

        Aindex = index if index else dates

        logging.debug("{PARMS}-%s %s %s %s",
                      'IDX' if index else 'DAT',
                      sheet.name,
                      Aindex,
                      orient)

        (BC,BR,EC,ER) = _get_total_range(sheet,Aindex,orient)

        logger.debug("{RANGE} %s:%s",_Aref(BC,BR),_Aref(EC,ER))  

        date_values = _get_values(sheet,BC,BR,EC,ER,kw)


        def _get_index(date_values):
            v = [ int(d) for d in date_values.flatten() ]
            date_array = np.array(v)
            return date_array

        # Find index as Vector or DateRange
        if index:
            date_array = _get_index(date_values)
        else:
            # TIMESERIES
            def _get_tseriesD():
                pass
            
            def _get_tseriesA(freq,date_values,kw):
                v = [ int(d) for d in date_values.flatten() if not np.isnan(d) ]
                D = [ Period(freq=str(freq),year=_v) for _v in v]
                date_array = PeriodIndex(D)
                return date_array

            def _get_tseriesQ(freq,date_values,kw):
                # print date_values
                by=0
                if kw.has_key('YEAR'):
                    by = eval(kw['YEAR'])-1
                v = [ (int(d)-1)%4+1+(int(_i/4)*4)+by*4 
                      for _i,d in enumerate(date_values.flatten()) 
                      if not np.isnan(d) ]
                D = [ Period(freq=str(freq),value=_v) for _v in v]
                date_array = PeriodIndex(D)
                return date_array

            def _get_tseriesM(freq,date_values,kw):
                dformat='INT'
                if kw.has_key('DFORMAT'):
                    dformat = kw['DFORMAT']
                if dformat=='INT':            
                    v = [ (int(d)-1)*12+i%12+1 for i,d in enumerate(date_values.flatten()) if not np.isnan(d)  ]
                    D = [ Period(freq=str(freq),value=_v) for _v in v]
                elif dformat=='XL_DATE':
                    v = [ xlrd.xldate_as_tuple(d,0) for i,d in enumerate(date_values.flatten()) if not np.isnan(d)  ]
                    D = [ Period(freq=str(freq),year=_v[0],month=_v[1]) for _v in v]
                else:
                    logger.error('DATE FORMAT NOT SUPPORTED ON EXCEL READING')
                    raise ValueError, dformat
                                #            print "FREQ=|%s|"%freq,D
                date_array = PeriodIndex(D)
                return date_array

            def _get_tseriesD(freq,date_values,kw):
                v = [ xlrd.xldate_as_tuple(int(d),0) 
                      for i,d in enumerate(date_values.flatten()) 
                      if not np.isnan(d)  ]
                D = [ Period(freq=str(freq),year=_v[0],month=_v[1],day=_v[2]) for _v in v]
                date_array = PeriodIndex(D)
                return date_array

            op = {
                'A': _get_tseriesA,
                'Y': _get_tseriesA,
                'Q': _get_tseriesQ,
                'M': _get_tseriesM,
                'D': _get_tseriesD,
                }

            freq = 'D'
            if 'FREQ' in kw:
                freq = kw['FREQ'].strip()
                
                if freq in op:
                    fnc = op[freq]
                    date_array = fnc(freq,date_values,kw)
                else:
                    logger.error('UNKNOWN FREQ %s',freq)
                    raise ValueError, 'UNKNOWN FREQ %s',freq
            else:
                logger.error('ABSENT FREQ %s',freq)
                raise ValueError, 'ABSENT FREQ %s',freq

        logger.debug("IND: %s", ','.join([ str(x) for x in date_array]))

        # serie
        series = "B2:"
        if kw.has_key('SERIES'):
            series = kw['SERIES']
        (Bc,Br,Ec,Er) = _get_total_range(sheet,series,invert(orient))
        
        Nseries = (Er - Br) if orient == 'H' else (Ec - Bc)

        B = Bc if orient == 'V' else Br
        E = Ec if orient == 'V' else Er

        logger.debug("(Bc=%d,Br=%d,Ec=%d,Er=%d,B=%d,E=%d)",Bc,Br,Ec,Er,B,E)

        # Gestione dei nomi delle serie
        name = kw.xget('NAME','TS')
        logger.debug("{NAME} %s",name) 
        _cmp = name 
        if ' ' not in name and ':' in name: 
            logger.debug("{NAME} with : %s",name) 
            _cmp=[] 
            _fmt = "%s" 
            _n = name 
            _f = str 
            m = rx_range.search(_n) 
            _name = [name,] 
            if m: 
                (_bc,_br,_ec,_er) = _get_range4(m.group(0)) 
                logger.debug("%s",(_bc,_br,_ec,_er)) 
                F =  [ _f(v) if len(unicode(v))>0 else None  
                       for v in _get_values_list(sheet,_bc,_br,_ec,_er,kw)] 
                _name = [ (_fmt.lower() % _p) if _p is not None else "_${NUM}_!" for _p in F ] 
 	 		                 
        elif ' ' in name:
            _cmp = []
            _ns = name.split(' ')
            M = 0
            for i,_n in enumerate(_ns):
                _f = str
                _fmt = "%s"

                if '|' in _n:
                    (_n,_f,_fmt)=_n.split('|')
                    _f = eval(_f.lower())
                if '=' in _n:
                    m = rx_range_eq.search(_n)
                    if m:
                        (_bc,_br,_ec,_er) = _get_range4(m.group(1))
                        logger.debug("%s",(_bc,_br,_ec,_er))
                        F =  [ _f(v) if len(unicode(v))>0 else None 
                                       for v in _get_values_list(sheet,_bc,_br,_ec,_er,kw)]
#                        print "FMt=",F
                        F = [ (_fmt.lower() % _p) if _p is not None else "_${NUM}_!" for _p in F ]
                        _cmp.append( F )
                        M = max(M,len(F))
                else:
                    _cmp.append([_f(_n),])
                    M = max(M,1)

            from itertools import cycle
            for p in range(0,len(_cmp)):
                _P = _cmp[p]
                if len(_P)<M:
                    _cmp[p] = []
                    for i,_p in enumerate(cycle(_P)):
                        if i < M:
                            _cmp[p].append(_p ) 
                        else:
                            break
                    _P = _cmp[p]
                _cmp[p] = _P
            _name = map(operator.add, *_cmp)
        else:
            _name = [ "%s${NUM}" % _cmp , ]

        Nnames = len(_name)
        if Nnames<Nseries:
            for i in range(Nnames,Nseries):
                _name.append("TS${NUM}")

        for i,x in enumerate(range(B,E+1)):
            #print _name
            #print i,x,len(_name)
            _n = _name[i].strip()
            _ts = None
            if _n in TS.keys():
                _n += "_${NUM}_?"
            N = Template(_n).safe_substitute({'NUM': i})
            N = unicode( N.upper().strip() ) #.decode('utf-8')

            #print u"reading series for %s" % N.encode('ascii','xmlcharrefreplace')
            #print " on ", x, "Row" if orient=='H' else "Column"

            if orient == 'H':
                _tv = _get_values(sheet,BC,x,EC,x,kw)
            elif orient == 'V':
                _tv = _get_values(sheet,x,BR,x,ER,kw)
                #print "_tv",_tv
                _tv = np.array([ _v[0] for _v in _tv ])
                #print "_tv2",_tv
#                print date_array
            else:
                logger.error('ORIENTTION ERROR')
                raise ValueError, 'ORIENT'

            if index is None: # isa timeseries
                #print "Tv=",_tv
                #print date_array
                _ts = Series(_tv[0:len(date_array)],index=date_array)
                TS[N] = _ts
            else: # isa np.array
                TS[N] = _tv

#            print "TS[%s]"%N.encode('ascii','xmlcharrefreplace'),TS[N]
#            _report(_ts)

        
        logger.debug("Read %d series from excel: %s",Nseries,','.join( sorted(TS.keys())))

        # Rename Phase
        if kw.has_key('RENAME'):
            rename_desc = get_list(kw['RENAME'])
            for r in rename_desc:
                (_f,_t) = r.split('>')
                if TS.has_key(_f):
                    if not TS.has_key(_t):
                        TS[_t]=TS[_f]
                        del TS[_f]
                    else:
                        logger.error('La serie %s giĆ  esiste nell\'IS',_t)
                else:
                    logger.error('La serie %s non esiste nell\'IS',_f)
            

        # Drop Phase
        logger.debug("Read XLS : ")

#        for k,t in TS.items():
#            print k
#            print tsinfo(t)
#            look(t._data)

        return TS