def update_missing(**kwargs): """ Update number of trials for missing values Returns: dict """ key = info_key(**kwargs) data_path = os.environ.get(BBG_ROOT, '').replace('\\', '/') empty_log = f'{data_path}/Logs/EmptyQueries.json' cur_miss = dict() if files.exists(empty_log): with open(empty_log, 'r') as fp: cur_miss = json.load(fp=fp) cur_miss[key] = cur_miss.get(key, 0) + 1 if files.exists(empty_log): while not os.access(empty_log, os.W_OK): time.sleep(1) else: files.create_folder(empty_log, is_file=True) with open(empty_log, 'w') as fp: json.dump(cur_miss, fp=fp, indent=2) globals()[_MISSING_] = cur_miss return cur_miss
def load_info(cat): """ Load parameters for assets Args: cat: category Returns: dict Examples: >>> import pandas as pd >>> >>> assets = load_info(cat='assets') >>> all(cat in assets for cat in ['Equity', 'Index', 'Curncy', 'Corp']) True >>> os.environ['BBG_PATH'] = '' >>> exch = load_info(cat='exch') >>> pd.Series(exch['EquityUS']).allday [400, 2000] >>> test_root = f'{PKG_PATH}/tests' >>> os.environ['BBG_PATH'] = test_root >>> ovrd_exch = load_info(cat='exch') >>> # Somehow os.environ is not set properly in doctest environment >>> ovrd_exch.update(_load_yaml_(f'{test_root}/markets/exch.yml')) >>> pd.Series(ovrd_exch['EquityUS']).allday [300, 2100] """ yaml_file = f'{PKG_PATH}/markets/{cat}.yml' root = os.environ.get('BBG_ROOT', '').replace('\\', '/') yaml_ovrd = f'{root}/markets/{cat}.yml' if root else '' if not files.exists(yaml_ovrd): yaml_ovrd = '' pkl_file = f'{PKG_PATH}/markets/cached/{cat}.pkl' ytime = files.file_modified_time(yaml_file) if yaml_ovrd: ytime = max(ytime, files.file_modified_time(yaml_ovrd)) if files.exists(pkl_file) and files.file_modified_time(pkl_file) > ytime: return pd.read_pickle(pkl_file).to_dict() res = _load_yaml_(yaml_file) if yaml_ovrd: for cat, ovrd in _load_yaml_(yaml_ovrd).items(): if isinstance(ovrd, dict): if cat in res: res[cat].update(ovrd) else: res[cat] = ovrd if isinstance(ovrd, list) and isinstance(res[cat], list): res[cat] += ovrd if not hasattr(sys, 'pytest_call'): files.create_folder(pkl_file, is_file=True) pd.Series(res).to_pickle(pkl_file) return res
def load_config(cat: str) -> pd.DataFrame: """ Load market info that can apply pd.Series directly Args: cat: category name Returns: pd.DataFrame """ cfg_files = config_files(cat=cat) cache_cfg = f'{PKG_PATH}/markets/cached/{cat}_cfg.pkl' last_mod = max(map(files.modified_time, cfg_files)) if files.exists(cache_cfg) and files.modified_time(cache_cfg) > last_mod: return pd.read_pickle(cache_cfg) config = ( pd.concat([ load_yaml(cf).apply(pd.Series) for cf in cfg_files ], sort=False) ) files.create_folder(cache_cfg, is_file=True) config.to_pickle(cache_cfg) return config
def asset_config(asset: str) -> pd.DataFrame: """ Load info for given asset Args: asset: asset name Returns: pd.DataFrame """ cfg_files = param.config_files('assets') cache_cfg = f'{PKG_PATH}/markets/cached/{asset}_cfg.pkl' last_mod = max(map(files.modified_time, cfg_files)) if files.exists(cache_cfg) and files.modified_time(cache_cfg) > last_mod: return pd.read_pickle(cache_cfg) config = (pd.concat( [ explode( data=pd.DataFrame(param.load_yaml(cf).get(asset, [])), columns=ASSET_INFO[asset], ) for cf in cfg_files ], sort=False).drop_duplicates(keep='last').reset_index(drop=True)) files.create_folder(cache_cfg, is_file=True) config.to_pickle(cache_cfg) return config
def bds(tickers, flds, **kwargs) -> pd.DataFrame: """ Bloomberg block data Args: tickers: ticker(s) flds: field **kwargs: other overrides for query Returns: pd.DataFrame: block data """ logger = logs.get_logger(bds, **kwargs) service = conn.bbg_service(service='//blp/refdata', **kwargs) request = service.createRequest('ReferenceDataRequest') if isinstance(tickers, str): data_file = storage.ref_file(ticker=tickers, fld=flds, has_date=True, ext='pkl', **kwargs) if files.exists(data_file): logger.debug(f'Loading Bloomberg data from: {data_file}') return pd.DataFrame(pd.read_pickle(data_file)) process.init_request(request=request, tickers=tickers, flds=flds, **kwargs) logger.debug(f'Sending request to Bloomberg ...\n{request}') conn.send_request(request=request, **kwargs) res = pd.DataFrame( process.rec_events(func=process.process_ref, **kwargs)) if kwargs.get('raw', False): return res if res.empty or any(fld not in res for fld in ['ticker', 'field']): return pd.DataFrame() data = (res.set_index(['ticker', 'field']).droplevel( axis=0, level=1).rename_axis(index=None).pipe( pipeline.standard_cols, col_maps=kwargs.get('col_maps', None))) if data_file: logger.debug(f'Saving Bloomberg data to: {data_file}') files.create_folder(data_file, is_file=True) data.to_pickle(data_file) return data return pd.DataFrame( pd.concat( [bds(tickers=ticker, flds=flds, **kwargs) for ticker in tickers], sort=False))
def current_missing(**kwargs): """ Check number of trials for missing values Returns: dict """ cur_miss = globals().get(_MISSING_, dict()) if not cur_miss: data_path = os.environ.get(BBG_ROOT, '').replace('\\', '/') empty_log = f'{data_path}/Logs/EmptyQueries.json' if not files.exists(empty_log): return 0 with open(empty_log, 'r') as fp: cur_miss = json.load(fp=fp) globals()[_MISSING_] = cur_miss return cur_miss.get(info_key(**kwargs), 0)
def config_files(cat: str) -> list: """ Category files Args: cat: category Returns: list of files that exist """ return [ f'{r}/markets/{cat}.yml' for r in [ PKG_PATH, os.environ.get('BBG_ROOT', '').replace('\\', '/'), ] if files.exists(f'{r}/markets/{cat}.yml') ]
def bdp(tickers, flds, cache=False, **kwargs): """ Get reference data and save to Args: tickers: tickers flds: fields to query cache: bool - use cache to store data **kwargs: overrides Returns: pd.DataFrame Examples: >>> bdp('IQ US Equity', 'Crncy', raw=True) ticker field value 0 IQ US Equity Crncy USD >>> bdp('IQ US Equity', 'Crncy').reset_index() ticker crncy 0 IQ US Equity USD """ logger = logs.get_logger(bdp, level=kwargs.pop('log', logs.LOG_LEVEL)) con, _ = create_connection() ovrds = assist.proc_ovrds(**kwargs) logger.info( f'loading reference data from Bloomberg:\n' f'{assist.info_qry(tickers=tickers, flds=flds)}' ) data = con.ref(tickers=tickers, flds=flds, ovrds=ovrds) if not cache: return [data] qry_data = [] for r, snap in data.iterrows(): subset = [r] data_file = storage.ref_file( ticker=snap.ticker, fld=snap.field, ext='pkl', cache=cache, **kwargs ) if data_file: if not files.exists(data_file): qry_data.append(data.iloc[subset]) files.create_folder(data_file, is_file=True) data.iloc[subset].to_pickle(data_file) return qry_data
def bdp_bds_cache(func, tickers, flds, **kwargs) -> ToQuery: """ Find cached `BDP` / `BDS` queries Args: func: function name - bdp or bds tickers: tickers flds: fields **kwargs: other kwargs Returns: ToQuery(ticker, flds, kwargs) """ cache_data = [] log_level = kwargs.get('log', logs.LOG_LEVEL) logger = logs.get_logger(bdp_bds_cache, level=log_level) has_date = kwargs.pop('has_date', func == 'bds') cache = kwargs.get('cache', True) tickers = utils.flatten(tickers) flds = utils.flatten(flds) loaded = pd.DataFrame(data=0, index=tickers, columns=flds) for ticker, fld in product(tickers, flds): data_file = storage.ref_file( ticker=ticker, fld=fld, has_date=has_date, cache=cache, ext='pkl', **{k: v for k, v in kwargs.items() if k not in EXC_COLS}) if not files.exists(data_file): continue logger.debug(f'reading from {data_file} ...') cache_data.append(pd.read_pickle(data_file)) loaded.loc[ticker, fld] = 1 to_qry = loaded.where(loaded == 0)\ .dropna(how='all', axis=1).dropna(how='all', axis=0) return ToQuery(tickers=to_qry.index.tolist(), flds=to_qry.columns.tolist(), cached_data=cache_data)
def _bds_( ticker: str, fld: str, logger: logs.logging.Logger, use_port: bool = False, **kwargs, ) -> pd.DataFrame: """ Get data of BDS of single ticker """ if 'has_date' not in kwargs: kwargs['has_date'] = True data_file = storage.ref_file(ticker=ticker, fld=fld, ext='pkl', **kwargs) if files.exists(data_file): logger.debug(f'Loading Bloomberg data from: {data_file}') return pd.DataFrame(pd.read_pickle(data_file)) request = process.create_request( service='//blp/refdata', request='PortfolioDataRequest' if use_port else 'ReferenceDataRequest', **kwargs, ) process.init_request(request=request, tickers=ticker, flds=fld, **kwargs) logger.debug(f'Sending request to Bloomberg ...\n{request}') conn.send_request(request=request, **kwargs) res = pd.DataFrame(process.rec_events(func=process.process_ref, **kwargs)) if kwargs.get('raw', False): return res if res.empty or any(fld not in res for fld in ['ticker', 'field']): return pd.DataFrame() data = (res.set_index(['ticker', 'field']).droplevel( axis=0, level=1).rename_axis(index=None).pipe(pipeline.standard_cols, col_maps=kwargs.get( 'col_maps', None))) if data_file: logger.debug(f'Saving Bloomberg data to: {data_file}') files.create_folder(data_file, is_file=True) data.to_pickle(data_file) return data
def load_yaml(yaml_file: str) -> pd.Series: """ Load yaml from cache Args: yaml_file: YAML file name Returns: pd.Series """ cache_file = ( yaml_file .replace('/markets/', '/markets/cached/') .replace('.yml', '.pkl') ) cur_mod = files.modified_time(yaml_file) if files.exists(cache_file) and files.modified_time(cache_file) > cur_mod: return pd.read_pickle(cache_file) with open(yaml_file, 'r') as fp: data = pd.Series(YAML().load(fp)) files.create_folder(cache_file, is_file=True) data.to_pickle(cache_file) return data
def bdib(ticker: str, dt, session='allday', typ='TRADE', **kwargs) -> pd.DataFrame: """ Bloomberg intraday bar data Args: ticker: ticker name dt: date to download session: [allday, day, am, pm, pre, post] typ: [TRADE, BID, ASK, BID_BEST, ASK_BEST, BEST_BID, BEST_ASK] **kwargs: ref: reference ticker or exchange used as supplement if exchange info is not defined for `ticker` batch: whether is batch process to download data log: level of logs Returns: pd.DataFrame """ from xbbg.core import trials logger = logs.get_logger(bdib, **kwargs) ex_info = const.exch_info(ticker=ticker, **kwargs) if ex_info.empty: raise KeyError(f'Cannot find exchange info for {ticker}') ss_rng = process.time_range(dt=dt, ticker=ticker, session=session, tz=ex_info.tz, **kwargs) data_file = storage.bar_file(ticker=ticker, dt=dt, typ=typ) if files.exists(data_file) and kwargs.get( 'cache', True) and (not kwargs.get('reload', False)): res = (pd.read_parquet(data_file).pipe( pipeline.add_ticker, ticker=ticker).loc[ss_rng[0]:ss_rng[1]]) if not res.empty: logger.debug(f'Loading Bloomberg intraday data from: {data_file}') return res if not process.check_current(dt=dt, logger=logger, **kwargs): return pd.DataFrame() cur_dt = pd.Timestamp(dt).strftime('%Y-%m-%d') q_tckr = ticker if ex_info.get('is_fut', False): is_sprd = ex_info.get( 'has_sprd', False) and (len(ticker[:-1]) != ex_info['tickers'][0]) if not is_sprd: q_tckr = fut_ticker(gen_ticker=ticker, dt=dt, freq=ex_info['freq']) if q_tckr == '': logger.error(f'cannot find futures ticker for {ticker} ...') return pd.DataFrame() info_log = f'{q_tckr} / {cur_dt} / {typ}' trial_kw = dict(ticker=ticker, dt=dt, typ=typ, func='bdib') num_trials = trials.num_trials(**trial_kw) if num_trials >= 2: if kwargs.get('batch', False): return pd.DataFrame() logger.info(f'{num_trials} trials with no data {info_log}') return pd.DataFrame() while conn.bbg_session(**kwargs).tryNextEvent(): pass time_rng = process.time_range(dt=dt, ticker=ticker, session='allday', **kwargs) request = process.create_request( service='//blp/refdata', request='IntradayBarRequest', settings=[ ('security', ticker), ('eventType', typ), ('interval', kwargs.get('interval', 1)), ('startDateTime', time_rng[0]), ('endDateTime', time_rng[1]), ], **kwargs, ) logger.debug(f'Sending request to Bloomberg ...\n{request}') conn.send_request(request=request, **kwargs) res = pd.DataFrame(process.rec_events(func=process.process_bar, **kwargs)) if res.empty or ('time' not in res): logger.warning(f'No data for {info_log} ...') trials.update_trials(cnt=num_trials + 1, **trial_kw) return pd.DataFrame() data = (res.set_index('time').rename_axis(index=None).rename( columns={ 'numEvents': 'num_trds' }).tz_localize('UTC').tz_convert(ex_info.tz).pipe(pipeline.add_ticker, ticker=ticker)) if kwargs.get('cache', True): storage.save_intraday(data=data[ticker], ticker=ticker, dt=dt, typ=typ, **kwargs) return data.loc[ss_rng[0]:ss_rng[1]]
def bds(tickers, flds, cache=False, **kwargs): """ Download block data from Bloomberg Args: tickers: ticker(s) flds: field(s) cache: whether read from cache **kwargs: other overrides for query -> raw: raw output from `pdbdp` library, default False Returns: pd.DataFrame: block data Examples: >>> import os >>> >>> pd.options.display.width = 120 >>> s_dt, e_dt = '20180301', '20181031' >>> dvd = bds( ... 'NVDA US Equity', 'DVD_Hist_All', ... DVD_Start_Dt=s_dt, DVD_End_Dt=e_dt, raw=True, ... ) >>> dvd.loc[:, ['ticker', 'name', 'value']].head(8) ticker name value 0 NVDA US Equity Declared Date 2018-08-16 1 NVDA US Equity Ex-Date 2018-08-29 2 NVDA US Equity Record Date 2018-08-30 3 NVDA US Equity Payable Date 2018-09-21 4 NVDA US Equity Dividend Amount 0.15 5 NVDA US Equity Dividend Frequency Quarter 6 NVDA US Equity Dividend Type Regular Cash 7 NVDA US Equity Declared Date 2018-05-10 >>> dvd = bds( ... 'NVDA US Equity', 'DVD_Hist_All', ... DVD_Start_Dt=s_dt, DVD_End_Dt=e_dt, ... ) >>> dvd.reset_index().loc[:, ['ticker', 'ex_date', 'dividend_amount']] ticker ex_date dividend_amount 0 NVDA US Equity 2018-08-29 0.15 1 NVDA US Equity 2018-05-23 0.15 >>> if not os.environ.get('BBG_ROOT', ''): ... os.environ['BBG_ROOT'] = f'{files.abspath(__file__, 1)}/tests/data' >>> idx_kw = dict(End_Dt='20181220', cache=True) >>> idx_wt = bds('DJI Index', 'Indx_MWeight_Hist', **idx_kw) >>> idx_wt.round(2).tail().reset_index(drop=True) index_member percent_weight 0 V UN 3.82 1 VZ UN 1.63 2 WBA UW 2.06 3 WMT UN 2.59 4 XOM UN 2.04 >>> idx_wt = bds('DJI Index', 'Indx_MWeight_Hist', **idx_kw) >>> idx_wt.round(2).head().reset_index(drop=True) index_member percent_weight 0 AAPL UW 4.65 1 AXP UN 2.84 2 BA UN 9.29 3 CAT UN 3.61 4 CSCO UW 1.26 """ logger = logs.get_logger(bds, level=kwargs.pop('log', logs.LOG_LEVEL)) has_date = kwargs.pop('has_date', True) con, _ = create_connection() ovrds = assist.proc_ovrds(**kwargs) logger.info( f'loading block data from Bloomberg:\n' f'{assist.info_qry(tickers=tickers, flds=flds)}' ) data = con.bulkref(tickers=tickers, flds=flds, ovrds=ovrds) if not cache: return [data] qry_data = [] for (ticker, fld), grp in data.groupby(['ticker', 'field']): data_file = storage.ref_file( ticker=ticker, fld=fld, has_date=has_date, ext='pkl', cache=cache, **kwargs ) if data_file: if not files.exists(data_file): qry_data.append(grp) files.create_folder(data_file, is_file=True) grp.reset_index(drop=True).to_pickle(data_file) return qry_data
def ref_file(ticker: str, fld: str, has_date=False, cache=False, ext='parq', **kwargs) -> str: """ Data file location for Bloomberg reference data Args: ticker: ticker name fld: field has_date: whether add current date to data file cache: if has_date is True, whether to load file from latest cached ext: file extension **kwargs: other overrides passed to ref function Returns: str: file location Examples: >>> import shutil >>> >>> os.environ['BBG_ROOT'] = '' >>> ref_file('BLT LN Equity', fld='Crncy') == '' True >>> os.environ['BBG_ROOT'] = '/data/bbg' >>> ref_file('BLT LN Equity', fld='Crncy', cache=True) '/data/bbg/Equity/BLT LN Equity/Crncy/ovrd=None.parq' >>> ref_file('BLT LN Equity', fld='Crncy') '' >>> cur_dt_ = utils.cur_time(tz=utils.DEFAULT_TZ) >>> ref_file( ... 'BLT LN Equity', fld='DVD_Hist_All', has_date=True, cache=True, ... ).replace(cur_dt_, '[cur_date]') '/data/bbg/Equity/BLT LN Equity/DVD_Hist_All/asof=[cur_date], ovrd=None.parq' >>> ref_file( ... 'BLT LN Equity', fld='DVD_Hist_All', has_date=True, ... cache=True, DVD_Start_Dt='20180101', ... ).replace(cur_dt_, '[cur_date]')[:-5] '/data/bbg/Equity/BLT LN Equity/DVD_Hist_All/asof=[cur_date], DVD_Start_Dt=20180101' >>> sample = 'asof=2018-11-02, DVD_Start_Dt=20180101, DVD_End_Dt=20180501.pkl' >>> root_path = 'xbbg/tests/data' >>> sub_path = f'{root_path}/Equity/AAPL US Equity/DVD_Hist_All' >>> os.environ['BBG_ROOT'] = root_path >>> for tmp_file in files.all_files(sub_path): os.remove(tmp_file) >>> files.create_folder(sub_path) >>> sample in shutil.copy(f'{root_path}/{sample}', sub_path) True >>> new_file = ref_file( ... 'AAPL US Equity', 'DVD_Hist_All', DVD_Start_Dt='20180101', ... has_date=True, cache=True, ext='pkl' ... ) >>> new_file.split('/')[-1] == f'asof={cur_dt_}, DVD_Start_Dt=20180101.pkl' True >>> old_file = 'asof=2018-11-02, DVD_Start_Dt=20180101, DVD_End_Dt=20180501.pkl' >>> old_full = '/'.join(new_file.split('/')[:-1] + [old_file]) >>> updated_file = old_full.replace('2018-11-02', cur_dt_) >>> updated_file in shutil.copy(old_full, updated_file) True >>> exist_file = ref_file( ... 'AAPL US Equity', 'DVD_Hist_All', DVD_Start_Dt='20180101', ... has_date=True, cache=True, ext='pkl' ... ) >>> exist_file == updated_file False >>> exist_file = ref_file( ... 'AAPL US Equity', 'DVD_Hist_All', DVD_Start_Dt='20180101', ... DVD_End_Dt='20180501', has_date=True, cache=True, ext='pkl' ... ) >>> exist_file == updated_file True """ data_path = os.environ.get(overrides.BBG_ROOT, '').replace('\\', '/') if (not data_path) or (not cache): return '' proper_ticker = ticker.replace('/', '_') cache_days = kwargs.pop('cache_days', 10) root = f'{data_path}/{ticker.split()[-1]}/{proper_ticker}/{fld}' ref_kw = {k: v for k, v in kwargs.items() if k not in overrides.PRSV_COLS} if len(ref_kw) > 0: info = utils.to_str(ref_kw)[1:-1].replace('|', '_') else: info = 'ovrd=None' # Check date info if has_date: cache_file = f'{root}/asof=[cur_date], {info}.{ext}' cur_dt = utils.cur_time() start_dt = pd.date_range(end=cur_dt, freq=f'{cache_days}D', periods=2)[0] for dt in pd.date_range(start=start_dt, end=cur_dt, normalize=True)[1:][::-1]: cur_file = cache_file.replace('[cur_date]', dt.strftime("%Y-%m-%d")) if files.exists(cur_file): return cur_file return cache_file.replace('[cur_date]', cur_dt) return f'{root}/{info}.{ext}'
def wrapper(*args, **kwargs): scope = utils.func_scope(func=func) param = inspect.signature(func).parameters port = kwargs.pop('port', _PORT_) timeout = kwargs.pop('timeout', _TIMEOUT_) restart = kwargs.pop('restart', False) all_kw = { k: args[n] if n < len(args) else v.default for n, (k, v) in enumerate(param.items()) if k != 'kwargs' } all_kw.update(kwargs) log_level = kwargs.get('log', logs.LOG_LEVEL) for to_list in ['tickers', 'flds']: conv = all_kw.get(to_list, None) if hasattr(conv, 'tolist'): all_kw[to_list] = getattr(conv, 'tolist')() if isinstance(conv, str): all_kw[to_list] = [conv] cached_data = [] if scope in ['xbbg.blp.bdp', 'xbbg.blp.bds']: to_qry = cached.bdp_bds_cache(func=func.__name__, **all_kw) cached_data += to_qry.cached_data if not (to_qry.tickers and to_qry.flds): if not cached_data: return pd.DataFrame() res = pd.concat(cached_data, sort=False).reset_index(drop=True) if not all_kw.get('raw', False): res = assist.format_output(data=res, source=func.__name__, col_maps=all_kw.get( 'col_maps', dict())) return res all_kw['tickers'] = to_qry.tickers all_kw['flds'] = to_qry.flds if scope in ['xbbg.blp.bdib']: data_file = storage.hist_file( ticker=all_kw['ticker'], dt=all_kw['dt'], typ=all_kw['typ'], ) if files.exists(data_file): logger = logs.get_logger(func, level=log_level) if all_kw.get('batch', False): return logger.debug(f'reading from {data_file} ...') return assist.format_intraday(data=pd.read_parquet(data_file), **all_kw) _, new = create_connection(port=port, timeout=timeout, restart=restart) res = func( ** {k: v for k, v in all_kw.items() if k not in ['raw', 'col_maps']}) if new: delete_connection() if scope.startswith('xbbg.blp.') and isinstance(res, list): final = cached_data + res if not final: return pd.DataFrame() res = pd.DataFrame(pd.concat(final, sort=False)) if (scope in ['xbbg.blp.bdp', 'xbbg.blp.bds']) \ and (not all_kw.get('raw', False)): res = assist.format_output( data=res.reset_index(drop=True), source=func.__name__, col_maps=all_kw.get('col_maps', dict()), ) return res