def load(self,dataset,report_func=None): """Load dataset from cache file""" options = self._options if not dataset or not options.switch_cache or not exists(dataset): return # get profile from timeseries file if exists profile = self._get_file_profile(dataset,options.profile) _accounting['cache.load.profile'] = profile d = options.cache_date if not d: d = datetime.today().isoformat()[:10] c = self._make_cachedir() f = self.filename(dataset) _accounting['cache.load.file'] = f # logger.debug('trying pickling results from %s',f) result = None if exists(f): # logger.debug('pickle file %s exists',f) result = DataSet() p = pickle.load(open(f,'r')) for k,v in p.items(): if k=="_MISSING": result.add_missing(*v) else: if v[0]==0: # Timeseries result[k]=Timeseries(data=v[1],metadata=v[2],name=k) elif v[0]==1: # Numpy Array result[k]=v[1] _accounting['cache.load.missing'] = ','.join(result.missing) _accounting['cache.load.series'] = ','.join(result.keys()) # Report if report_func: # logger.debug(dictview(_accounting)) report_func("load",f,result,self,_accounting) _accounting.clear() else: # logger.debug('no pickle file %s',f) if options.cache_date: logger.error('Requested date (%s) does not exists',d) sys.exit(-1) return result
class DataProvider(object): """ General access class to Data Providers with URI notation like: * ``dstream://Datastream/AGINDEMIF`` o * ``flinp://DB22/ECB_STS1/M.BG.N.PROD.NS0020.4.000?name=IPBUL&start=$NOW-24M&end=$NOW&proc=weighted_avg&(IPBUL2000AVG)check`` """ # library functions _func = { 'ISO2' : { 'def' : ndc.get_country_alpha2, 'args' : ( 'code', ) }, 'ISO3' : { 'def' : ndc.get_country_alpha3, 'args' : ( 'code', ) }, 'ISON' : { 'def' : ndc.get_country_numeric, 'args' : ( 'code', ) }, 'ISOAREA': { 'def' : ndc.get_area_name, 'args' : ( 'code', ) }, 'ISOREGION': { 'def' : ndc.get_region_name, 'args' : ( 'code', ) } } def __init__(self,name,profile=None,options=Options()): counter = options.counter # logger.debug('CLASS=%s,COUNTER=%s',self.__class__.__name__, counter) self._options = options self.name = name self._retries = 3 # TODO: to get from options # tuple extracting from urlparse self.requesting = [] # get data values self.get_values = None # The Information Set self._res = DataSet() # Missing Variables self._missing = [] # Connection Profile self._profile=profile # Options for request self._opt_request_req = False self._opt_delete_base_kvars = False self._opt_discipline_inline_function_single = True # Base parameters for data providers self._append_param('LASTYEAR','(($THISYEAR-$YUPD))') self._append_param('PREVYEAR','(($LASTYEAR-1))') # Password violation (dont make other requests) self._password_violation = False def append(self,url): """ Append a data request URL to the list >>> dp = DataProvider() >>> dp.append('dstream://Datastream/AGINDEMIF') >>> dp.append('dstream://Datastream/PCH#(AGINDEMIF,1Y)') >>> dp.append('option://param/TEST?VALUE') """ url=stripcomments(url.rstrip('\n ').upper()) if len(url)==0: return # Needs double parsing because urlparse function # cant parse (?...&...) # if schema is not http: # management of # character URL = url.replace('#','__A~~~~A__') # first pass of parsing ot get scheme up = urlparse(URL) # substitute parameters when scheme is not option (CHECK) if not re.search('^option$',up.scheme,re.IGNORECASE) and '$' in url: url = Template(url).safe_substitute(self._options.define_set) # reset # url = url.replace('#','__A~~~~A__') # ...second urlparse using a fake http: schema h_url = re.sub("^%s://" % up.scheme, "http://" , url,flags=re.I) h_pa = urlparse(h_url) parsed = list(h_pa) parsed[0] = unicode(up.scheme) A = parsed[2] parsed[2] = parsed[2].replace('__A~~~~A__','#') # ?!? parsed[2] = parsed[2].replace('\\','/') # for option URI insert in the option list if re.search('^option$',up.scheme,re.I): self._append_option(parsed) return # logger.debug('%s (%d)', parsed, len(self.requesting)) # otherwise add to requesting list self.requesting.append(parsed) def info(self): s = "" for req in self.requesting: hostname=req[1].upper() series = [_replace_funcs(s,self._func) for s in [req[2][1:],]] # .split('+') xparams = req[4] s += "%s|%s|%s" % (hostname,series,xparams) return s def mk_request(self,sources,serie): """The mk_request transform the request string in the structure understood by the provider driver""" return sources def request(self,profile=None): """ Request data :rtype TimeSeriesResultSet: resultset di timeseries """ global acct if hasattr(self.provider,'open'): self.provider.open() self._res = DataSet() # Base Dataset is empty # accounting stuff acct_l = {} _accounting['datareq.profile']=profile R = udict() for req in self.requesting: basevars = udict({ 'name': None, 'start': None, 'end': None, 'proc': None, 'check': None }) hostname=req[1].upper() series = [ _replace_funcs(s,self._func) for s in [req[2][1:],]] xparams = req[4] #logger.debug('dataprovider requests %s from %s | %s (%s%s)', # ','.join(series),hostname,str(xparams), # 'R' if self._opt_request_req else "S", # 'K' if self._opt_delete_base_kvars else "-" ) _accounting["datareq.series.%s"%','.join(series)]='%s | %s (%s%s)' % ( hostname,str(xparams), 'R' if self._opt_request_req else "S", 'K' if self._opt_delete_base_kvars else "-" ) # Replace self._funcs xparams = _replace_funcs(xparams,self._func) kvars = basevars if len(xparams)>0: kvars2 = udict(parse_qsl(xparams)) accepted(kvars, 'NAME', 'START', 'END', 'PROC', 'CHECK') kvars.update(kvars2) if 'NAME' in kvars: kvars['NAME']=kvars['NAME'].strip() if self._opt_delete_base_kvars: for _k in kvars.keys(): if _k in kvars2: del kvars2[_k] if self._opt_request_req: reqs = req else: reqs = series res = [] if not self._options.only_options: if self._opt_request_req: reqs = self.mk_request(req,hostname) else: reqs = self.mk_request(series,hostname) # if self._options.switch_verbose: # for k,v in kvars.items(): # logger.info('K:%s=%s',k,v) with Timer() as t: res = self.mget(reqs,**kvars) _accounting['%s.request.%s.time' % (self.name,kvars['NAME'])] = t.msecs _accounting['%s.request.%s.req' % (self.name,kvars['NAME'])] = req if res: _accounting['%s.request.%s.res' % (self.name,kvars['NAME'])] = res # acct_l[kvars['NAME']] = (reqs,kvars,res) # if 'provider' not in acct: # acct['provider']={} #acct['provider'][self.name]=acct_l kvars = {} if hasattr(self.provider,'close'): self.provider.close() _accounting['%s.request.missing' % (self.name)] = ','.join(self._missing) self._res.add_missing(*self._missing) return self._res def mget(self,reqs,**kw): for serie in reqs: name = serie if kw['NAME']: name = kw['NAME'] else: kw['NAME'] = serie _ts = None try: # gets data from provider _ts = self.get(serie,**kw) except ValueError, exc: logger.debug('Not saving %s in information set - series missing',serie) # ...save in results if _ts: self._res.update(_ts) # or in missing list else: if kw.has_key('NAME') and kw['NAME'] is not None: self._missing.extend(kw['NAME'].split(',')) else: logger.warn('Anonymous MISSNG found') _accounting['load.inline.processors.discipline']=self._opt_discipline_inline_function_single if self._opt_discipline_inline_function_single==True: # logger.debug("_inline_processor with discipline True") self._res = self._inline_processor(self._res,name,kw) if self._opt_discipline_inline_function_single!=True: # logger.debug("_inline_processor with discipline False") self._res = self._inline_processor(self._res,None,kw) return _ts