コード例 #1
0
ファイル: __init__.py プロジェクト: exedre/e4t
class DataProvider(object):
    """
    General access class to Data Providers with URI notation like:
    
    * ``dstream://Datastream/AGINDEMIF`` o 
    * ``flinp://DB22/ECB_STS1/M.BG.N.PROD.NS0020.4.000?name=IPBUL&start=$NOW-24M&end=$NOW&proc=weighted_avg&(IPBUL2000AVG)check``

    """
    # library functions
    _func = { 
        'ISO2' :     { 'def' : ndc.get_country_alpha2,  'args' : ( 'code', ) },
        'ISO3' :     { 'def' : ndc.get_country_alpha3,  'args' : ( 'code', ) },
        'ISON' :     { 'def' : ndc.get_country_numeric, 'args' : ( 'code', ) },
        'ISOAREA':   { 'def' : ndc.get_area_name,       'args' : ( 'code', ) },
        'ISOREGION': { 'def' : ndc.get_region_name,     'args' : ( 'code', ) }
        }

    
    def __init__(self,name,profile=None,options=Options()):
        counter = options.counter 
        # logger.debug('CLASS=%s,COUNTER=%s',self.__class__.__name__, counter)

        self._options = options 
        self.name = name
        self._retries = 3 # TODO: to get from options
        
        # tuple extracting from urlparse
        self.requesting = []

        # get data values        
        self.get_values = None

        # The Information Set
        self._res = DataSet()

        # Missing Variables
        self._missing = []

        # Connection Profile
        self._profile=profile

        # Options for request
        self._opt_request_req = False
        self._opt_delete_base_kvars = False
        self._opt_discipline_inline_function_single = True
        
        # Base parameters for data providers
        self._append_param('LASTYEAR','(($THISYEAR-$YUPD))')
        self._append_param('PREVYEAR','(($LASTYEAR-1))')

        # Password violation (dont make other requests)
        self._password_violation = False

    def append(self,url):
        """
        Append a data request URL to the list

        >>> dp = DataProvider()
        >>> dp.append('dstream://Datastream/AGINDEMIF')
        >>> dp.append('dstream://Datastream/PCH#(AGINDEMIF,1Y)')
        >>> dp.append('option://param/TEST?VALUE')
        """

        url=stripcomments(url.rstrip('\n ').upper())
        if len(url)==0:
            return        

        # Needs double parsing because urlparse function
        # cant parse (?...&...)
        # if schema is not http:

        # management of # character
        URL = url.replace('#','__A~~~~A__')

        # first pass of parsing ot get scheme
        up = urlparse(URL)

        # substitute parameters when scheme is not option (CHECK)
        if not re.search('^option$',up.scheme,re.IGNORECASE) and '$' in url:
            url = Template(url).safe_substitute(self._options.define_set)

        # reset #
        url = url.replace('#','__A~~~~A__')
        
        # ...second urlparse using a fake http: schema
        h_url = re.sub("^%s://" % up.scheme, "http://" , url,flags=re.I)
        h_pa  = urlparse(h_url)
        
        parsed = list(h_pa)
        parsed[0] = unicode(up.scheme)
        A = parsed[2]
        parsed[2] = parsed[2].replace('__A~~~~A__','#')  # ?!?
        parsed[2] = parsed[2].replace('\\','/')


        # for option URI insert in the option list
        if re.search('^option$',up.scheme,re.I):
            self._append_option(parsed)
            return

        # logger.debug('%s (%d)', parsed, len(self.requesting))
        
        # otherwise add to requesting list
        self.requesting.append(parsed)

        
    def info(self):
        s = ""
        for req in self.requesting:
            hostname=req[1].upper()
            series = [_replace_funcs(s,self._func) for s in [req[2][1:],]] # .split('+')
            xparams = req[4]
            s += "%s|%s|%s" % (hostname,series,xparams)
        return s

            
    def mk_request(self,sources,serie):
        """The mk_request transform the request string in the structure understood by the provider driver"""
        return sources


    def request(self,profile=None):
        """
        Request data

        :rtype TimeSeriesResultSet: resultset di timeseries
        """
        global acct
        
        if hasattr(self.provider,'open'):
            self.provider.open()

        self._res = DataSet() # Base Dataset is empty


        # accounting stuff
        acct_l = {}
        _accounting['datareq.profile']=profile

        R = udict()
        for req in self.requesting:

            basevars = udict({ 'name':  None, 
                               'start': None, 
                               'end':   None, 
                               'proc':  None, 
                               'check': None })
            
            hostname=req[1].upper()
            series = [ _replace_funcs(s,self._func) for s in [req[2][1:],]] 
            xparams = req[4]

            #logger.debug('dataprovider requests %s from %s | %s (%s%s)',
            #             ','.join(series),hostname,str(xparams),
            #             'R' if self._opt_request_req else "S", 
            #             'K' if self._opt_delete_base_kvars else "-" )
            
            _accounting["datareq.series.%s"%','.join(series)]='%s | %s (%s%s)' % (
                hostname,str(xparams),
                'R' if self._opt_request_req else "S", 
                'K' if self._opt_delete_base_kvars else "-" )

            # Replace self._funcs
            xparams = _replace_funcs(xparams,self._func)
            kvars = basevars
            if len(xparams)>0:
                kvars2 = udict(parse_qsl(xparams))
                accepted(kvars,
                         'NAME',
                         'START',
                         'END',
                         'PROC',
                         'CHECK')

                kvars.update(kvars2)


                if 'NAME' in kvars:
                    kvars['NAME']=kvars['NAME'].strip()

                if self._opt_delete_base_kvars:
                    for _k in kvars.keys():
                        if _k in kvars2:
                            del kvars2[_k]


            if self._opt_request_req:                            
                reqs = req
            else:
                reqs = series
    
            res = []
            
            if not self._options.only_options:                                
                if self._opt_request_req:
                    reqs = self.mk_request(req,hostname)
                else:            
                    reqs = self.mk_request(series,hostname)

                # if self._options.switch_verbose:
                #     for k,v in kvars.items():
                #         logger.info('K:%s=%s',k,v)
                
                with Timer() as t:
                    res  = self.mget(reqs,**kvars)
                _accounting['%s.request.%s.time' % (self.name,kvars['NAME'])] = t.msecs
                _accounting['%s.request.%s.req' % (self.name,kvars['NAME'])] = req
                if res:
                    _accounting['%s.request.%s.res' % (self.name,kvars['NAME'])] = res
            

            
            # acct_l[kvars['NAME']] = (reqs,kvars,res) 
            # if 'provider' not in acct:
            #    acct['provider']={}
            #acct['provider'][self.name]=acct_l

            kvars = {}

        if hasattr(self.provider,'close'):
            self.provider.close()

        _accounting['%s.request.missing' % (self.name)] = ','.join(self._missing)

        self._res.add_missing(*self._missing)
        
        return self._res

    def mget(self,reqs,**kw):

        for serie in reqs:
            name = serie
                
            if kw['NAME']:  name = kw['NAME']
            else:           kw['NAME'] = serie
                                

            _ts = None
            try:
                # gets data from provider
                _ts = self.get(serie,**kw)
            except ValueError, exc:
                logger.debug('Not saving %s in information set - series missing',serie)

            # ...save in results
            if _ts: 
                self._res.update(_ts)
            #    or in missing list
            else:   
                if kw.has_key('NAME') and kw['NAME'] is not None:
                    self._missing.extend(kw['NAME'].split(','))
                else:
                    logger.warn('Anonymous MISSNG found')

            _accounting['load.inline.processors.discipline']=self._opt_discipline_inline_function_single

            if self._opt_discipline_inline_function_single==True:
                # logger.debug("_inline_processor with discipline True")
                self._res = self._inline_processor(self._res,name,kw)

        if self._opt_discipline_inline_function_single!=True:
            # logger.debug("_inline_processor with discipline False")
            self._res = self._inline_processor(self._res,None,kw)

        return _ts