def insert_entities(cls, entity_path, tickers_path): # load raw entity data target_cols = ['CIK', 'SEC_Name', 'CUSIP6'] dtypes = {'CIK': str, 'SEC_Name': str, 'CUSIP6': str} df = pd.read_csv(entity_path, usecols=target_cols, dtype=dtypes) ticks_target_cols = [ 'ticker', 'sic_code', 'naics', 'cik_code', 'SICGroupMinorGroupTitle' ] ticks_dtypes = {k: str for k in ticks_target_cols} dfticks = pd.read_csv(tickers_path, usecols=ticks_target_cols, dtype=ticks_dtypes) df = df.merge(dfticks, left_on='CIK', right_on='cik_code') df = df.drop(labels=['cik_code'], axis=1) df = df[df.ticker.str.len() <= 10] # update column names, filter valid cusips and drop duplicates colnames = { 'CIK': 'cik', 'SEC_Name': 'name', 'CUSIP6': 'cusip6', 'sic_code': 'sic', 'SICGroupMinorGroupTitle': 'sic_mtitle' } df = df.rename(columns=colnames) df = df[df.cusip6.str.contains('^[A-Z0-9]{6}$')] df = df.drop_duplicates(subset=['cusip6']) # insert data into db db.bulk_insert_mappings(cls, df.to_dict(orient='records')) db.commit()
def update_corporate_fk(): # corporates matching 1st 6 digits of CUSIP-9 with entity CUSIP-6 s = select([ Corporate.id, Entity.id, Corporate.entity_id, Corporate.cusip9, Entity.cusip6 ]).where(Corporate.entity_id.is_(None)).where( func.left(Corporate.cusip9, 6) == Entity.cusip6) rows = db.execute(s).fetchall() # update entity_id for CUSIP-6 matches for cid, eid, _, cusip9, cusip6 in rows: db.query(Corporate).filter(Corporate.id == cid).update( {Corporate.entity_id: eid}, synchronize_session=False) db.commit() # corporates matching company_symbol with entity ticker s = select([ Corporate.id, Entity.id, Corporate.entity_id, Corporate.company_symbol, Entity.ticker ]).where(Corporate.entity_id.is_(None)).where( Corporate.company_symbol == Entity.ticker) rows = db.execute(s).fetchall() # update entity_id for ticker matches for r in rows: db.query(Corporate).filter(Corporate.id == r[0]).update( {Corporate.entity_id: r[1]}, synchronize_session=False) db.commit()
def insert_corporates(cls, corps_path, nrows=None): # load data df = pd.read_csv(corps_path, parse_dates=['trd_rpt_efctv_dt', 'mtrty_dt'], nrows=nrows) cmap = { 'bond_sym_id': 'finra_symbol', 'cusip_id': 'cusip9' } df = df.rename(columns=cmap) # drop duplicate records and records missing required fields df = df.drop_duplicates(subset=['finra_symbol']) df = df.drop_duplicates(subset=['cusip9']) df = df.drop_duplicates(subset=['bsym_id']) df = df.dropna(subset=['finra_symbol', 'cusip9', 'bsym_id', 'cpn_rt', 'cpn_type_cd']) # only records with valid CUSIP-9 values df = df[df.cusip9.str.contains('^[A-Z0-9]{9}$')] # remove converts df = df[df.cnvrb_fl != 'Y'] # remove unused fields, convert NaN to None, convert booleans df = df.drop(labels=['cnvrb_fl', 'dissem', 'grade'], axis=1) debt_type_cd = df.debt_type_cd.where(df.debt_type_cd.notnull(), None) df.loc[:, 'debt_type_cd'] = debt_type_cd df.loc[:, 'ind_144a'] = df.ind_144a == 'Y' # insert cleaned data into db table db.bulk_insert_mappings(cls, df.to_dict(orient='records')) db.commit()
def update_equity_px_fk(): # equity pxs matching ticker with entity ticker equity_px = Base.metadata.tables['equity_px'] entity = Base.metadata.tables['entity'] s = update(equity_px).where(equity_px.columns.entity_id.is_(None)).where( equity_px.columns.ticker == entity.columns.ticker).values( entity_id=entity.columns.id) db.execute(s) db.commit()
def update_financial_fk(): # financials matching ticker with entity ticker financial = Base.metadata.tables['financial'] entity = Base.metadata.tables['entity'] s = update(financial).where(financial.columns.entity_id.is_(None)).where( financial.columns.ticker == entity.columns.ticker).values( entity_id=entity.columns.id) db.execute(s) db.commit()
def update_corp_tx_fk(): corp_tx = Base.metadata.tables['corp_tx'] corporate = Base.metadata.tables['corporate'] # corp_tx cusip_id matches corporate cusip9 s = update(corp_tx).where(corp_tx.columns.corporate_id.is_(None)).where( corp_tx.columns.cusip_id == corporate.columns.cusip9).values( corporate_id=corporate.columns.id) db.execute(s) db.commit()
def insert_financials(cls, fin_dir, nrows=None): tickers = get_tickers(fin_dir) fpaths = [join(fin_dir, f'{t}.csv') for t in tickers] for ticker, fin_path in zip(tickers, fpaths): df = pd.read_csv(fin_path, nrows=nrows).dropna( subset=['earnings_release_date', 'filing_date']) if df.shape[0] > 0: df = df.replace(to_replace={np.nan: None}) df['ticker'] = ticker db.bulk_insert_mappings(cls, df.to_dict(orient='records')) db.commit()
def insert_interest_rates(cls, rdir): pends = [p for p in listdir(rdir) if isfile(join(rdir, p))] targets = [(p.split('.csv')[0], join(rdir, p)) for p in pends] df = None for field, rates_path in targets: dftmp = pd.read_csv(rates_path, na_values=['.']).dropna() if dftmp.shape[0] > 0: dftmp = dftmp.rename(columns={'interest_rate': field}) \ .set_index('date') if df is None: df = dftmp.copy() else: df = df.join(dftmp, how='outer') df = df.reset_index().dropna() db.bulk_insert_mappings(cls, df.to_dict(orient='records')) db.commit()
def insert_corp_txs(cls, txs_path, nrows=None): df = pd.read_csv(txs_path, nrows=nrows).dropna() if nrows is None: step_size = 100000 else: step_size = min(nrows, 100000) steps = df.shape[0] // step_size for step in range(steps): idx = step * step_size if step == steps: dftxs = df.iloc[idx:] db.bulk_insert_mappings(cls, dftxs.to_dict(orient='records')) db.commit() else: dftxs = df.iloc[idx:idx + step_size] db.bulk_insert_mappings(cls, dftxs.to_dict(orient='records')) db.commit()
def insert_equity_pxs(cls, equities_dir, nrows=None): tickers = get_tickers(equities_dir) pxpaths = [join(equities_dir, f'{t}.csv') for t in tickers] cmap = { 'Date': 'date', 'Volume': 'volume', 'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Adj Close': 'adj_close' } for ticker, pxpath in zip(tickers, pxpaths): df = pd.read_csv(pxpath, nrows=nrows).dropna() if df.shape[0] > 0: df = df.rename(columns=cmap) df['ticker'] = ticker db.bulk_insert_mappings(cls, df.to_dict(orient='records')) db.commit()
def del_zero_cpn(table): # remove zero coupon bonds db.query(table).filter(table.cpn_rt == 0).delete(synchronize_session=False) db.commit()
def del_no_corporate(table): db.query(table).filter( table.corporate_id.is_(None)).delete(synchronize_session=False) db.commit()
def del_no_entity(table): db.query(table).filter( table.entity_id.is_(None)).delete(synchronize_session=False) db.commit()
def del_high_cpn(table): # remove high coupon bonds db.query(table).filter(table.cpn_rt >= 15).delete( synchronize_session=False) db.commit()
def update_scrty_ds(table): # standardize Senior Unsecured db.query(table).update({table.scrty_ds: 'Senior Unsecured'}, synchronize_session=False) db.commit()
def del_invalid_scrty_ds(table): # only senior unsecured notes db.query(table).filter(table.scrty_ds.notin_(VALID_SCRTY_DS)).delete( synchronize_session=False) db.commit()
def del_invalid_debt_types(table): # delete rows with invalid debt type codes db.query(table).filter(table.debt_type_cd.in_(INVALID_DEBT_TYPES)).delete( synchronize_session=False) db.commit()
def del_invalid_sub_prdct_types(table): # delete rows with invalid sub product type codes db.query(table).filter( table.sub_prdct_type.in_(INVALID_SUB_PRDCT_TYPES)).delete( synchronize_session=False) db.commit()
def del_invalid_cpn_types(table): # only vanilla fixed coupon bonds db.query(table) \ .filter(table.cpn_type_cd != 'FXPV') \ .delete(synchronize_session=False) db.commit()