class DataProvider(object): """ General access class to Data Providers with URI notation like: * ``dstream://Datastream/AGINDEMIF`` o * ``flinp://DB22/ECB_STS1/M.BG.N.PROD.NS0020.4.000?name=IPBUL&start=$NOW-24M&end=$NOW&proc=weighted_avg&(IPBUL2000AVG)check`` """ # library functions _func = { 'ISO2' : { 'def' : ndc.get_country_alpha2, 'args' : ( 'code', ) }, 'ISO3' : { 'def' : ndc.get_country_alpha3, 'args' : ( 'code', ) }, 'ISON' : { 'def' : ndc.get_country_numeric, 'args' : ( 'code', ) }, 'ISOAREA': { 'def' : ndc.get_area_name, 'args' : ( 'code', ) }, 'ISOREGION': { 'def' : ndc.get_region_name, 'args' : ( 'code', ) } } def __init__(self,name,profile=None,options=Options()): counter = options.counter # logger.debug('CLASS=%s,COUNTER=%s',self.__class__.__name__, counter) self._options = options self.name = name self._retries = 3 # TODO: to get from options # tuple extracting from urlparse self.requesting = [] # get data values self.get_values = None # The Information Set self._res = DataSet() # Missing Variables self._missing = [] # Connection Profile self._profile=profile # Options for request self._opt_request_req = False self._opt_delete_base_kvars = False self._opt_discipline_inline_function_single = True # Base parameters for data providers self._append_param('LASTYEAR','(($THISYEAR-$YUPD))') self._append_param('PREVYEAR','(($LASTYEAR-1))') # Password violation (dont make other requests) self._password_violation = False def append(self,url): """ Append a data request URL to the list >>> dp = DataProvider() >>> dp.append('dstream://Datastream/AGINDEMIF') >>> dp.append('dstream://Datastream/PCH#(AGINDEMIF,1Y)') >>> dp.append('option://param/TEST?VALUE') """ url=stripcomments(url.rstrip('\n ').upper()) if len(url)==0: return # Needs double parsing because urlparse function # cant parse (?...&...) # if schema is not http: # management of # character URL = url.replace('#','__A~~~~A__') # first pass of parsing ot get scheme up = urlparse(URL) # substitute parameters when scheme is not option (CHECK) if not re.search('^option$',up.scheme,re.IGNORECASE) and '$' in url: url = Template(url).safe_substitute(self._options.define_set) # reset # url = url.replace('#','__A~~~~A__') # ...second urlparse using a fake http: schema h_url = re.sub("^%s://" % up.scheme, "http://" , url,flags=re.I) h_pa = urlparse(h_url) parsed = list(h_pa) parsed[0] = unicode(up.scheme) A = parsed[2] parsed[2] = parsed[2].replace('__A~~~~A__','#') # ?!? parsed[2] = parsed[2].replace('\\','/') # for option URI insert in the option list if re.search('^option$',up.scheme,re.I): self._append_option(parsed) return # logger.debug('%s (%d)', parsed, len(self.requesting)) # otherwise add to requesting list self.requesting.append(parsed) def info(self): s = "" for req in self.requesting: hostname=req[1].upper() series = [_replace_funcs(s,self._func) for s in [req[2][1:],]] # .split('+') xparams = req[4] s += "%s|%s|%s" % (hostname,series,xparams) return s def mk_request(self,sources,serie): """The mk_request transform the request string in the structure understood by the provider driver""" return sources def request(self,profile=None): """ Request data :rtype TimeSeriesResultSet: resultset di timeseries """ global acct if hasattr(self.provider,'open'): self.provider.open() self._res = DataSet() # Base Dataset is empty # accounting stuff acct_l = {} _accounting['datareq.profile']=profile R = udict() for req in self.requesting: basevars = udict({ 'name': None, 'start': None, 'end': None, 'proc': None, 'check': None }) hostname=req[1].upper() series = [ _replace_funcs(s,self._func) for s in [req[2][1:],]] xparams = req[4] #logger.debug('dataprovider requests %s from %s | %s (%s%s)', # ','.join(series),hostname,str(xparams), # 'R' if self._opt_request_req else "S", # 'K' if self._opt_delete_base_kvars else "-" ) _accounting["datareq.series.%s"%','.join(series)]='%s | %s (%s%s)' % ( hostname,str(xparams), 'R' if self._opt_request_req else "S", 'K' if self._opt_delete_base_kvars else "-" ) # Replace self._funcs xparams = _replace_funcs(xparams,self._func) kvars = basevars if len(xparams)>0: kvars2 = udict(parse_qsl(xparams)) accepted(kvars, 'NAME', 'START', 'END', 'PROC', 'CHECK') kvars.update(kvars2) if 'NAME' in kvars: kvars['NAME']=kvars['NAME'].strip() if self._opt_delete_base_kvars: for _k in kvars.keys(): if _k in kvars2: del kvars2[_k] if self._opt_request_req: reqs = req else: reqs = series res = [] if not self._options.only_options: if self._opt_request_req: reqs = self.mk_request(req,hostname) else: reqs = self.mk_request(series,hostname) # if self._options.switch_verbose: # for k,v in kvars.items(): # logger.info('K:%s=%s',k,v) with Timer() as t: res = self.mget(reqs,**kvars) _accounting['%s.request.%s.time' % (self.name,kvars['NAME'])] = t.msecs _accounting['%s.request.%s.req' % (self.name,kvars['NAME'])] = req if res: _accounting['%s.request.%s.res' % (self.name,kvars['NAME'])] = res # acct_l[kvars['NAME']] = (reqs,kvars,res) # if 'provider' not in acct: # acct['provider']={} #acct['provider'][self.name]=acct_l kvars = {} if hasattr(self.provider,'close'): self.provider.close() _accounting['%s.request.missing' % (self.name)] = ','.join(self._missing) self._res.add_missing(*self._missing) return self._res def mget(self,reqs,**kw): for serie in reqs: name = serie if kw['NAME']: name = kw['NAME'] else: kw['NAME'] = serie _ts = None try: # gets data from provider _ts = self.get(serie,**kw) except ValueError, exc: logger.debug('Not saving %s in information set - series missing',serie) # ...save in results if _ts: self._res.update(_ts) # or in missing list else: if kw.has_key('NAME') and kw['NAME'] is not None: self._missing.extend(kw['NAME'].split(',')) else: logger.warn('Anonymous MISSNG found') _accounting['load.inline.processors.discipline']=self._opt_discipline_inline_function_single if self._opt_discipline_inline_function_single==True: # logger.debug("_inline_processor with discipline True") self._res = self._inline_processor(self._res,name,kw) if self._opt_discipline_inline_function_single!=True: # logger.debug("_inline_processor with discipline False") self._res = self._inline_processor(self._res,None,kw) return _ts