Esempio n. 1
0
    def insert_entities(cls, entity_path, tickers_path):
        # load raw entity data
        target_cols = ['CIK', 'SEC_Name', 'CUSIP6']
        dtypes = {'CIK': str, 'SEC_Name': str, 'CUSIP6': str}
        df = pd.read_csv(entity_path, usecols=target_cols, dtype=dtypes)
        ticks_target_cols = [
            'ticker', 'sic_code', 'naics', 'cik_code',
            'SICGroupMinorGroupTitle'
        ]
        ticks_dtypes = {k: str for k in ticks_target_cols}
        dfticks = pd.read_csv(tickers_path,
                              usecols=ticks_target_cols,
                              dtype=ticks_dtypes)
        df = df.merge(dfticks, left_on='CIK', right_on='cik_code')
        df = df.drop(labels=['cik_code'], axis=1)
        df = df[df.ticker.str.len() <= 10]

        # update column names, filter valid cusips and drop duplicates
        colnames = {
            'CIK': 'cik',
            'SEC_Name': 'name',
            'CUSIP6': 'cusip6',
            'sic_code': 'sic',
            'SICGroupMinorGroupTitle': 'sic_mtitle'
        }
        df = df.rename(columns=colnames)
        df = df[df.cusip6.str.contains('^[A-Z0-9]{6}$')]
        df = df.drop_duplicates(subset=['cusip6'])

        # insert data into db
        db.bulk_insert_mappings(cls, df.to_dict(orient='records'))
        db.commit()
Esempio n. 2
0
    def insert_corporates(cls, corps_path, nrows=None):
        # load data
        df = pd.read_csv(corps_path,
                         parse_dates=['trd_rpt_efctv_dt', 'mtrty_dt'],
                         nrows=nrows)
        cmap = {
            'bond_sym_id': 'finra_symbol',
            'cusip_id': 'cusip9'
        }
        df = df.rename(columns=cmap)

        # drop duplicate records and records missing required fields
        df = df.drop_duplicates(subset=['finra_symbol'])
        df = df.drop_duplicates(subset=['cusip9'])
        df = df.drop_duplicates(subset=['bsym_id'])
        df = df.dropna(subset=['finra_symbol', 'cusip9', 'bsym_id',
                               'cpn_rt', 'cpn_type_cd'])

        # only records with valid CUSIP-9 values
        df = df[df.cusip9.str.contains('^[A-Z0-9]{9}$')]

        # remove converts
        df = df[df.cnvrb_fl != 'Y']

        # remove unused fields, convert NaN to None, convert booleans
        df = df.drop(labels=['cnvrb_fl', 'dissem', 'grade'], axis=1)
        debt_type_cd = df.debt_type_cd.where(df.debt_type_cd.notnull(), None)
        df.loc[:, 'debt_type_cd'] = debt_type_cd
        df.loc[:, 'ind_144a'] = df.ind_144a == 'Y'

        # insert cleaned data into db table
        db.bulk_insert_mappings(cls, df.to_dict(orient='records'))
        db.commit()
Esempio n. 3
0
 def insert_financials(cls, fin_dir, nrows=None):
     tickers = get_tickers(fin_dir)
     fpaths = [join(fin_dir, f'{t}.csv') for t in tickers]
     for ticker, fin_path in zip(tickers, fpaths):
         df = pd.read_csv(fin_path, nrows=nrows).dropna(
             subset=['earnings_release_date', 'filing_date'])
         if df.shape[0] > 0:
             df = df.replace(to_replace={np.nan: None})
             df['ticker'] = ticker
             db.bulk_insert_mappings(cls, df.to_dict(orient='records'))
             db.commit()
Esempio n. 4
0
 def insert_interest_rates(cls, rdir):
     pends = [p for p in listdir(rdir) if isfile(join(rdir, p))]
     targets = [(p.split('.csv')[0], join(rdir, p)) for p in pends]
     df = None
     for field, rates_path in targets:
         dftmp = pd.read_csv(rates_path, na_values=['.']).dropna()
         if dftmp.shape[0] > 0:
             dftmp = dftmp.rename(columns={'interest_rate': field}) \
                         .set_index('date')
             if df is None:
                 df = dftmp.copy()
             else:
                 df = df.join(dftmp, how='outer')
     df = df.reset_index().dropna()
     db.bulk_insert_mappings(cls, df.to_dict(orient='records'))
     db.commit()
Esempio n. 5
0
 def insert_corp_txs(cls, txs_path, nrows=None):
     df = pd.read_csv(txs_path, nrows=nrows).dropna()
     if nrows is None:
         step_size = 100000
     else:
         step_size = min(nrows, 100000)
     steps = df.shape[0] // step_size
     for step in range(steps):
         idx = step * step_size
         if step == steps:
             dftxs = df.iloc[idx:]
             db.bulk_insert_mappings(cls, dftxs.to_dict(orient='records'))
             db.commit()
         else:
             dftxs = df.iloc[idx:idx + step_size]
             db.bulk_insert_mappings(cls, dftxs.to_dict(orient='records'))
             db.commit()
Esempio n. 6
0
 def insert_equity_pxs(cls, equities_dir, nrows=None):
     tickers = get_tickers(equities_dir)
     pxpaths = [join(equities_dir, f'{t}.csv') for t in tickers]
     cmap = {
         'Date': 'date',
         'Volume': 'volume',
         'Open': 'open',
         'High': 'high',
         'Low': 'low',
         'Close': 'close',
         'Adj Close': 'adj_close'
     }
     for ticker, pxpath in zip(tickers, pxpaths):
         df = pd.read_csv(pxpath, nrows=nrows).dropna()
         if df.shape[0] > 0:
             df = df.rename(columns=cmap)
             df['ticker'] = ticker
             db.bulk_insert_mappings(cls, df.to_dict(orient='records'))
             db.commit()