def update_missing(**kwargs): """ Update number of trials for missing values Returns: dict """ key = info_key(**kwargs) data_path = os.environ.get(BBG_ROOT, '').replace('\\', '/') empty_log = f'{data_path}/Logs/EmptyQueries.json' cur_miss = dict() if files.exists(empty_log): with open(empty_log, 'r') as fp: cur_miss = json.load(fp=fp) cur_miss[key] = cur_miss.get(key, 0) + 1 if files.exists(empty_log): while not os.access(empty_log, os.W_OK): time.sleep(1) else: files.create_folder(empty_log, is_file=True) with open(empty_log, 'w') as fp: json.dump(cur_miss, fp=fp, indent=2) globals()[_MISSING_] = cur_miss return cur_miss
def load_config(cat: str) -> pd.DataFrame: """ Load market info that can apply pd.Series directly Args: cat: category name Returns: pd.DataFrame """ cfg_files = config_files(cat=cat) cache_cfg = f'{PKG_PATH}/markets/cached/{cat}_cfg.pkl' last_mod = max(map(files.modified_time, cfg_files)) if files.exists(cache_cfg) and files.modified_time(cache_cfg) > last_mod: return pd.read_pickle(cache_cfg) config = ( pd.concat([ load_yaml(cf).apply(pd.Series) for cf in cfg_files ], sort=False) ) files.create_folder(cache_cfg, is_file=True) config.to_pickle(cache_cfg) return config
def asset_config(asset: str) -> pd.DataFrame: """ Load info for given asset Args: asset: asset name Returns: pd.DataFrame """ cfg_files = param.config_files('assets') cache_cfg = f'{PKG_PATH}/markets/cached/{asset}_cfg.pkl' last_mod = max(map(files.modified_time, cfg_files)) if files.exists(cache_cfg) and files.modified_time(cache_cfg) > last_mod: return pd.read_pickle(cache_cfg) config = (pd.concat( [ explode( data=pd.DataFrame(param.load_yaml(cf).get(asset, [])), columns=ASSET_INFO[asset], ) for cf in cfg_files ], sort=False).drop_duplicates(keep='last').reset_index(drop=True)) files.create_folder(cache_cfg, is_file=True) config.to_pickle(cache_cfg) return config
def bds(tickers, flds, **kwargs) -> pd.DataFrame: """ Bloomberg block data Args: tickers: ticker(s) flds: field **kwargs: other overrides for query Returns: pd.DataFrame: block data """ logger = logs.get_logger(bds, **kwargs) service = conn.bbg_service(service='//blp/refdata', **kwargs) request = service.createRequest('ReferenceDataRequest') if isinstance(tickers, str): data_file = storage.ref_file(ticker=tickers, fld=flds, has_date=True, ext='pkl', **kwargs) if files.exists(data_file): logger.debug(f'Loading Bloomberg data from: {data_file}') return pd.DataFrame(pd.read_pickle(data_file)) process.init_request(request=request, tickers=tickers, flds=flds, **kwargs) logger.debug(f'Sending request to Bloomberg ...\n{request}') conn.send_request(request=request, **kwargs) res = pd.DataFrame( process.rec_events(func=process.process_ref, **kwargs)) if kwargs.get('raw', False): return res if res.empty or any(fld not in res for fld in ['ticker', 'field']): return pd.DataFrame() data = (res.set_index(['ticker', 'field']).droplevel( axis=0, level=1).rename_axis(index=None).pipe( pipeline.standard_cols, col_maps=kwargs.get('col_maps', None))) if data_file: logger.debug(f'Saving Bloomberg data to: {data_file}') files.create_folder(data_file, is_file=True) data.to_pickle(data_file) return data return pd.DataFrame( pd.concat( [bds(tickers=ticker, flds=flds, **kwargs) for ticker in tickers], sort=False))
def load_info(cat): """ Load parameters for assets Args: cat: category Returns: dict Examples: >>> import pandas as pd >>> >>> assets = load_info(cat='assets') >>> all(cat in assets for cat in ['Equity', 'Index', 'Curncy', 'Corp']) True >>> os.environ['BBG_PATH'] = '' >>> exch = load_info(cat='exch') >>> pd.Series(exch['EquityUS']).allday [400, 2000] >>> test_root = f'{PKG_PATH}/tests' >>> os.environ['BBG_PATH'] = test_root >>> ovrd_exch = load_info(cat='exch') >>> # Somehow os.environ is not set properly in doctest environment >>> ovrd_exch.update(_load_yaml_(f'{test_root}/markets/exch.yml')) >>> pd.Series(ovrd_exch['EquityUS']).allday [300, 2100] """ yaml_file = f'{PKG_PATH}/markets/{cat}.yml' root = os.environ.get('BBG_ROOT', '').replace('\\', '/') yaml_ovrd = f'{root}/markets/{cat}.yml' if root else '' if not files.exists(yaml_ovrd): yaml_ovrd = '' pkl_file = f'{PKG_PATH}/markets/cached/{cat}.pkl' ytime = files.file_modified_time(yaml_file) if yaml_ovrd: ytime = max(ytime, files.file_modified_time(yaml_ovrd)) if files.exists(pkl_file) and files.file_modified_time(pkl_file) > ytime: return pd.read_pickle(pkl_file).to_dict() res = _load_yaml_(yaml_file) if yaml_ovrd: for cat, ovrd in _load_yaml_(yaml_ovrd).items(): if isinstance(ovrd, dict): if cat in res: res[cat].update(ovrd) else: res[cat] = ovrd if isinstance(ovrd, list) and isinstance(res[cat], list): res[cat] += ovrd if not hasattr(sys, 'pytest_call'): files.create_folder(pkl_file, is_file=True) pd.Series(res).to_pickle(pkl_file) return res
def update_missing(**kwargs): """ Update number of trials for missing values """ data_path = root_path() if not data_path: return if len(kwargs) == 0: return log_path = f'{data_path}/Logs/{missing_info(**kwargs)}' cnt = len(files.all_files(log_path)) + 1 files.create_folder(log_path) open(f'{log_path}/{cnt}.log', 'a').close()
def update_missing(**kwargs): """ Update number of trials for missing values """ data_path = os.environ.get(BBG_ROOT, '').replace('\\', '/') if not data_path: return if len(kwargs) == 0: return log_path = f'{data_path}/Logs/{missing_info(**kwargs)}' cnt = len(files.all_files(log_path)) + 1 files.create_folder(log_path) open(f'{log_path}/{cnt}.log', 'a').close()
def save_intraday(data: pd.DataFrame, ticker: str, dt, typ='TRADE', **kwargs): """ Check whether data is done for the day and save Args: data: data ticker: ticker dt: date typ: [TRADE, BID, ASK, BID_BEST, ASK_BEST, BEST_BID, BEST_ASK] Examples: >>> os.environ['BBG_ROOT'] = 'xbbg/tests/data' >>> sample = pd.read_parquet('xbbg/tests/data/aapl.parq') >>> save_intraday(sample, 'AAPL US Equity', '2018-11-02') >>> # Invalid exchange >>> save_intraday(sample, 'AAPL XX Equity', '2018-11-02') >>> # Invalid empty data >>> save_intraday(pd.DataFrame(), 'AAPL US Equity', '2018-11-02') >>> # Invalid date - too close >>> cur_dt = utils.cur_time() >>> save_intraday(sample, 'AAPL US Equity', cur_dt) """ cur_dt = pd.Timestamp(dt).strftime('%Y-%m-%d') logger = logs.get_logger(save_intraday, level='debug') info = f'{ticker} / {cur_dt} / {typ}' data_file = bar_file(ticker=ticker, dt=dt, typ=typ) if not data_file: return if data.empty: logger.warning(f'data is empty for {info} ...') return exch = const.exch_info(ticker=ticker, **kwargs) if exch.empty: return end_time = pd.Timestamp( const.market_timing(ticker=ticker, dt=dt, timing='FINISHED', **kwargs)).tz_localize(exch.tz) now = pd.Timestamp('now', tz=exch.tz) - pd.Timedelta('1H') if end_time > now: logger.debug( f'skip saving cause market close ({end_time}) < now - 1H ({now}) ...' ) return logger.info(f'saving data to {data_file} ...') files.create_folder(data_file, is_file=True) data.to_parquet(data_file)
def update_trials(**kwargs): """ Update number of trials for missing values """ data_path = root_path() if not data_path: return if 'cnt' not in kwargs: kwargs['cnt'] = num_trials(**kwargs) + 1 db_file = f'{data_path}/Logs/xbbg.db' files.create_folder(db_file, is_file=True) with db.SQLite(db_file) as con: con.execute(TRIALS_TABLE) con.execute(db.replace_into( table='trials', **trail_info(**kwargs), ))
def bdp(tickers, flds, cache=False, **kwargs): """ Get reference data and save to Args: tickers: tickers flds: fields to query cache: bool - use cache to store data **kwargs: overrides Returns: pd.DataFrame Examples: >>> bdp('IQ US Equity', 'Crncy', raw=True) ticker field value 0 IQ US Equity Crncy USD >>> bdp('IQ US Equity', 'Crncy').reset_index() ticker crncy 0 IQ US Equity USD """ logger = logs.get_logger(bdp, level=kwargs.pop('log', logs.LOG_LEVEL)) con, _ = create_connection() ovrds = assist.proc_ovrds(**kwargs) logger.info( f'loading reference data from Bloomberg:\n' f'{assist.info_qry(tickers=tickers, flds=flds)}' ) data = con.ref(tickers=tickers, flds=flds, ovrds=ovrds) if not cache: return [data] qry_data = [] for r, snap in data.iterrows(): subset = [r] data_file = storage.ref_file( ticker=snap.ticker, fld=snap.field, ext='pkl', cache=cache, **kwargs ) if data_file: if not files.exists(data_file): qry_data.append(data.iloc[subset]) files.create_folder(data_file, is_file=True) data.iloc[subset].to_pickle(data_file) return qry_data
def num_trials(**kwargs) -> int: """ Check number of trials for missing values Returns: int: number of trials already tried """ data_path = root_path() if not data_path: return 0 db_file = f'{data_path}/Logs/xbbg.db' files.create_folder(db_file, is_file=True) with db.SQLite(db_file) as con: con.execute(TRIALS_TABLE) num = con.execute(db.select( table='trials', **trail_info(**kwargs), )).fetchall() if not num: return 0 return num[0][-1]
def _bds_( ticker: str, fld: str, logger: logs.logging.Logger, use_port: bool = False, **kwargs, ) -> pd.DataFrame: """ Get data of BDS of single ticker """ if 'has_date' not in kwargs: kwargs['has_date'] = True data_file = storage.ref_file(ticker=ticker, fld=fld, ext='pkl', **kwargs) if files.exists(data_file): logger.debug(f'Loading Bloomberg data from: {data_file}') return pd.DataFrame(pd.read_pickle(data_file)) request = process.create_request( service='//blp/refdata', request='PortfolioDataRequest' if use_port else 'ReferenceDataRequest', **kwargs, ) process.init_request(request=request, tickers=ticker, flds=fld, **kwargs) logger.debug(f'Sending request to Bloomberg ...\n{request}') conn.send_request(request=request, **kwargs) res = pd.DataFrame(process.rec_events(func=process.process_ref, **kwargs)) if kwargs.get('raw', False): return res if res.empty or any(fld not in res for fld in ['ticker', 'field']): return pd.DataFrame() data = (res.set_index(['ticker', 'field']).droplevel( axis=0, level=1).rename_axis(index=None).pipe(pipeline.standard_cols, col_maps=kwargs.get( 'col_maps', None))) if data_file: logger.debug(f'Saving Bloomberg data to: {data_file}') files.create_folder(data_file, is_file=True) data.to_pickle(data_file) return data
def load_yaml(yaml_file: str) -> pd.Series: """ Load yaml from cache Args: yaml_file: YAML file name Returns: pd.Series """ cache_file = ( yaml_file .replace('/markets/', '/markets/cached/') .replace('.yml', '.pkl') ) cur_mod = files.modified_time(yaml_file) if files.exists(cache_file) and files.modified_time(cache_file) > cur_mod: return pd.read_pickle(cache_file) with open(yaml_file, 'r') as fp: data = pd.Series(YAML().load(fp)) files.create_folder(cache_file, is_file=True) data.to_pickle(cache_file) return data
def bds(tickers, flds, cache=False, **kwargs): """ Download block data from Bloomberg Args: tickers: ticker(s) flds: field(s) cache: whether read from cache **kwargs: other overrides for query -> raw: raw output from `pdbdp` library, default False Returns: pd.DataFrame: block data Examples: >>> import os >>> >>> pd.options.display.width = 120 >>> s_dt, e_dt = '20180301', '20181031' >>> dvd = bds( ... 'NVDA US Equity', 'DVD_Hist_All', ... DVD_Start_Dt=s_dt, DVD_End_Dt=e_dt, raw=True, ... ) >>> dvd.loc[:, ['ticker', 'name', 'value']].head(8) ticker name value 0 NVDA US Equity Declared Date 2018-08-16 1 NVDA US Equity Ex-Date 2018-08-29 2 NVDA US Equity Record Date 2018-08-30 3 NVDA US Equity Payable Date 2018-09-21 4 NVDA US Equity Dividend Amount 0.15 5 NVDA US Equity Dividend Frequency Quarter 6 NVDA US Equity Dividend Type Regular Cash 7 NVDA US Equity Declared Date 2018-05-10 >>> dvd = bds( ... 'NVDA US Equity', 'DVD_Hist_All', ... DVD_Start_Dt=s_dt, DVD_End_Dt=e_dt, ... ) >>> dvd.reset_index().loc[:, ['ticker', 'ex_date', 'dividend_amount']] ticker ex_date dividend_amount 0 NVDA US Equity 2018-08-29 0.15 1 NVDA US Equity 2018-05-23 0.15 >>> if not os.environ.get('BBG_ROOT', ''): ... os.environ['BBG_ROOT'] = f'{files.abspath(__file__, 1)}/tests/data' >>> idx_kw = dict(End_Dt='20181220', cache=True) >>> idx_wt = bds('DJI Index', 'Indx_MWeight_Hist', **idx_kw) >>> idx_wt.round(2).tail().reset_index(drop=True) index_member percent_weight 0 V UN 3.82 1 VZ UN 1.63 2 WBA UW 2.06 3 WMT UN 2.59 4 XOM UN 2.04 >>> idx_wt = bds('DJI Index', 'Indx_MWeight_Hist', **idx_kw) >>> idx_wt.round(2).head().reset_index(drop=True) index_member percent_weight 0 AAPL UW 4.65 1 AXP UN 2.84 2 BA UN 9.29 3 CAT UN 3.61 4 CSCO UW 1.26 """ logger = logs.get_logger(bds, level=kwargs.pop('log', logs.LOG_LEVEL)) has_date = kwargs.pop('has_date', True) con, _ = create_connection() ovrds = assist.proc_ovrds(**kwargs) logger.info( f'loading block data from Bloomberg:\n' f'{assist.info_qry(tickers=tickers, flds=flds)}' ) data = con.bulkref(tickers=tickers, flds=flds, ovrds=ovrds) if not cache: return [data] qry_data = [] for (ticker, fld), grp in data.groupby(['ticker', 'field']): data_file = storage.ref_file( ticker=ticker, fld=fld, has_date=has_date, ext='pkl', cache=cache, **kwargs ) if data_file: if not files.exists(data_file): qry_data.append(grp) files.create_folder(data_file, is_file=True) grp.reset_index(drop=True).to_pickle(data_file) return qry_data