Example #1
0
    def insert_entities(cls, entity_path, tickers_path):
        # load raw entity data
        target_cols = ['CIK', 'SEC_Name', 'CUSIP6']
        dtypes = {'CIK': str, 'SEC_Name': str, 'CUSIP6': str}
        df = pd.read_csv(entity_path, usecols=target_cols, dtype=dtypes)
        ticks_target_cols = [
            'ticker', 'sic_code', 'naics', 'cik_code',
            'SICGroupMinorGroupTitle'
        ]
        ticks_dtypes = {k: str for k in ticks_target_cols}
        dfticks = pd.read_csv(tickers_path,
                              usecols=ticks_target_cols,
                              dtype=ticks_dtypes)
        df = df.merge(dfticks, left_on='CIK', right_on='cik_code')
        df = df.drop(labels=['cik_code'], axis=1)
        df = df[df.ticker.str.len() <= 10]

        # update column names, filter valid cusips and drop duplicates
        colnames = {
            'CIK': 'cik',
            'SEC_Name': 'name',
            'CUSIP6': 'cusip6',
            'sic_code': 'sic',
            'SICGroupMinorGroupTitle': 'sic_mtitle'
        }
        df = df.rename(columns=colnames)
        df = df[df.cusip6.str.contains('^[A-Z0-9]{6}$')]
        df = df.drop_duplicates(subset=['cusip6'])

        # insert data into db
        db.bulk_insert_mappings(cls, df.to_dict(orient='records'))
        db.commit()
Example #2
0
def update_corporate_fk():
    # corporates matching 1st 6 digits of CUSIP-9 with entity CUSIP-6
    s = select([
        Corporate.id, Entity.id, Corporate.entity_id, Corporate.cusip9,
        Entity.cusip6
    ]).where(Corporate.entity_id.is_(None)).where(
        func.left(Corporate.cusip9, 6) == Entity.cusip6)
    rows = db.execute(s).fetchall()

    # update entity_id for CUSIP-6 matches
    for cid, eid, _, cusip9, cusip6 in rows:
        db.query(Corporate).filter(Corporate.id == cid).update(
            {Corporate.entity_id: eid}, synchronize_session=False)
        db.commit()

    # corporates matching company_symbol with entity ticker
    s = select([
        Corporate.id, Entity.id, Corporate.entity_id, Corporate.company_symbol,
        Entity.ticker
    ]).where(Corporate.entity_id.is_(None)).where(
        Corporate.company_symbol == Entity.ticker)
    rows = db.execute(s).fetchall()

    # update entity_id for ticker matches
    for r in rows:
        db.query(Corporate).filter(Corporate.id == r[0]).update(
            {Corporate.entity_id: r[1]}, synchronize_session=False)
        db.commit()
Example #3
0
    def insert_corporates(cls, corps_path, nrows=None):
        # load data
        df = pd.read_csv(corps_path,
                         parse_dates=['trd_rpt_efctv_dt', 'mtrty_dt'],
                         nrows=nrows)
        cmap = {
            'bond_sym_id': 'finra_symbol',
            'cusip_id': 'cusip9'
        }
        df = df.rename(columns=cmap)

        # drop duplicate records and records missing required fields
        df = df.drop_duplicates(subset=['finra_symbol'])
        df = df.drop_duplicates(subset=['cusip9'])
        df = df.drop_duplicates(subset=['bsym_id'])
        df = df.dropna(subset=['finra_symbol', 'cusip9', 'bsym_id',
                               'cpn_rt', 'cpn_type_cd'])

        # only records with valid CUSIP-9 values
        df = df[df.cusip9.str.contains('^[A-Z0-9]{9}$')]

        # remove converts
        df = df[df.cnvrb_fl != 'Y']

        # remove unused fields, convert NaN to None, convert booleans
        df = df.drop(labels=['cnvrb_fl', 'dissem', 'grade'], axis=1)
        debt_type_cd = df.debt_type_cd.where(df.debt_type_cd.notnull(), None)
        df.loc[:, 'debt_type_cd'] = debt_type_cd
        df.loc[:, 'ind_144a'] = df.ind_144a == 'Y'

        # insert cleaned data into db table
        db.bulk_insert_mappings(cls, df.to_dict(orient='records'))
        db.commit()
Example #4
0
def update_equity_px_fk():
    # equity pxs matching ticker with entity ticker
    equity_px = Base.metadata.tables['equity_px']
    entity = Base.metadata.tables['entity']
    s = update(equity_px).where(equity_px.columns.entity_id.is_(None)).where(
        equity_px.columns.ticker == entity.columns.ticker).values(
            entity_id=entity.columns.id)
    db.execute(s)
    db.commit()
Example #5
0
def update_financial_fk():
    # financials matching ticker with entity ticker
    financial = Base.metadata.tables['financial']
    entity = Base.metadata.tables['entity']
    s = update(financial).where(financial.columns.entity_id.is_(None)).where(
        financial.columns.ticker == entity.columns.ticker).values(
            entity_id=entity.columns.id)
    db.execute(s)
    db.commit()
Example #6
0
def update_corp_tx_fk():
    corp_tx = Base.metadata.tables['corp_tx']
    corporate = Base.metadata.tables['corporate']

    # corp_tx cusip_id matches corporate cusip9
    s = update(corp_tx).where(corp_tx.columns.corporate_id.is_(None)).where(
        corp_tx.columns.cusip_id == corporate.columns.cusip9).values(
            corporate_id=corporate.columns.id)
    db.execute(s)
    db.commit()
Example #7
0
 def insert_financials(cls, fin_dir, nrows=None):
     tickers = get_tickers(fin_dir)
     fpaths = [join(fin_dir, f'{t}.csv') for t in tickers]
     for ticker, fin_path in zip(tickers, fpaths):
         df = pd.read_csv(fin_path, nrows=nrows).dropna(
             subset=['earnings_release_date', 'filing_date'])
         if df.shape[0] > 0:
             df = df.replace(to_replace={np.nan: None})
             df['ticker'] = ticker
             db.bulk_insert_mappings(cls, df.to_dict(orient='records'))
             db.commit()
Example #8
0
 def insert_interest_rates(cls, rdir):
     pends = [p for p in listdir(rdir) if isfile(join(rdir, p))]
     targets = [(p.split('.csv')[0], join(rdir, p)) for p in pends]
     df = None
     for field, rates_path in targets:
         dftmp = pd.read_csv(rates_path, na_values=['.']).dropna()
         if dftmp.shape[0] > 0:
             dftmp = dftmp.rename(columns={'interest_rate': field}) \
                         .set_index('date')
             if df is None:
                 df = dftmp.copy()
             else:
                 df = df.join(dftmp, how='outer')
     df = df.reset_index().dropna()
     db.bulk_insert_mappings(cls, df.to_dict(orient='records'))
     db.commit()
Example #9
0
 def insert_corp_txs(cls, txs_path, nrows=None):
     df = pd.read_csv(txs_path, nrows=nrows).dropna()
     if nrows is None:
         step_size = 100000
     else:
         step_size = min(nrows, 100000)
     steps = df.shape[0] // step_size
     for step in range(steps):
         idx = step * step_size
         if step == steps:
             dftxs = df.iloc[idx:]
             db.bulk_insert_mappings(cls, dftxs.to_dict(orient='records'))
             db.commit()
         else:
             dftxs = df.iloc[idx:idx + step_size]
             db.bulk_insert_mappings(cls, dftxs.to_dict(orient='records'))
             db.commit()
Example #10
0
 def insert_equity_pxs(cls, equities_dir, nrows=None):
     tickers = get_tickers(equities_dir)
     pxpaths = [join(equities_dir, f'{t}.csv') for t in tickers]
     cmap = {
         'Date': 'date',
         'Volume': 'volume',
         'Open': 'open',
         'High': 'high',
         'Low': 'low',
         'Close': 'close',
         'Adj Close': 'adj_close'
     }
     for ticker, pxpath in zip(tickers, pxpaths):
         df = pd.read_csv(pxpath, nrows=nrows).dropna()
         if df.shape[0] > 0:
             df = df.rename(columns=cmap)
             df['ticker'] = ticker
             db.bulk_insert_mappings(cls, df.to_dict(orient='records'))
             db.commit()
Example #11
0
def del_zero_cpn(table):
    # remove zero coupon bonds
    db.query(table).filter(table.cpn_rt == 0).delete(synchronize_session=False)
    db.commit()
Example #12
0
def del_no_corporate(table):
    db.query(table).filter(
        table.corporate_id.is_(None)).delete(synchronize_session=False)
    db.commit()
Example #13
0
def del_no_entity(table):
    db.query(table).filter(
        table.entity_id.is_(None)).delete(synchronize_session=False)
    db.commit()
Example #14
0
def del_high_cpn(table):
    # remove high coupon bonds
    db.query(table).filter(table.cpn_rt >= 15).delete(
        synchronize_session=False)
    db.commit()
Example #15
0
def update_scrty_ds(table):
    # standardize Senior Unsecured
    db.query(table).update({table.scrty_ds: 'Senior Unsecured'},
                           synchronize_session=False)
    db.commit()
Example #16
0
def del_invalid_scrty_ds(table):
    # only senior unsecured notes
    db.query(table).filter(table.scrty_ds.notin_(VALID_SCRTY_DS)).delete(
        synchronize_session=False)
    db.commit()
Example #17
0
def del_invalid_debt_types(table):
    # delete rows with invalid debt type codes
    db.query(table).filter(table.debt_type_cd.in_(INVALID_DEBT_TYPES)).delete(
        synchronize_session=False)
    db.commit()
Example #18
0
def del_invalid_sub_prdct_types(table):
    # delete rows with invalid sub product type codes
    db.query(table).filter(
        table.sub_prdct_type.in_(INVALID_SUB_PRDCT_TYPES)).delete(
            synchronize_session=False)
    db.commit()
Example #19
0
def del_invalid_cpn_types(table):
    # only vanilla fixed coupon bonds
    db.query(table) \
        .filter(table.cpn_type_cd != 'FXPV') \
        .delete(synchronize_session=False)
    db.commit()