Ejemplo n.º 1
0
    def historical():
        """Historical EOD data."""
        bpath = Path(baseDir().path, 'StockEOD')

        historical = ({
            'combined':
            get_most_recent_fpath(bpath.joinpath('combined')),
            'combined_all':
            get_most_recent_fpath(bpath.joinpath('combined_all')),
            'combined_year':
            get_most_recent_fpath(bpath.joinpath('combined_year'))
        })
Ejemplo n.º 2
0
    def externals():
        """External information sources."""
        bpath = Path(baseDir().path)

        externals = ({
            'daily_breaker':
            get_most_recent_fpath(bpath.joinpath('short', 'daily_breaker'),
                                  f_pre='nasdaq'),
            'halts':
            get_most_recent_fpath(bpath.joinpath('short', 'halts'))
        })

        return externals
Ejemplo n.º 3
0
    def company_stats():
        """Company stats."""
        bpath = Path(baseDir().path, 'company_stats')

        stats = ({
            'analyst_recs':
            bpath.joinpath('analyst_recs', '_2022.parquet'),
            'meta':
            get_most_recent_fpath(bpath.joinpath('meta', 'combined')),
            'stats':
            get_most_recent_fpath(bpath.joinpath('stats', 'combined'))
        })

        return stats
Ejemplo n.º 4
0
    def sec():
        """SEC feeds."""
        bpath = Path(baseDir().path, 'sec')
        dt = getDate.query('iex_eod')
        yr = str(dt.year)

        sec = ({
            'rss':
            get_most_recent_fpath(bpath.joinpath('rss', yr)),
            'daily_idx':
            get_most_recent_fpath(bpath.joinpath('daily_index', yr)),
            'daily_idx_combined':
            bpath.joinpath('daily_index', '_all_combined.parquet')
        })

        return sec
Ejemplo n.º 5
0
    def scans():
        """Scans for stocks/other items."""
        bpath = Path(baseDir().path, 'scans')

        scans = ({'top_vol': get_most_recent_fpath(bpath.joinpath('top_vol'))})

        return scans
Ejemplo n.º 6
0
    def _get_missing_dates_df(cls, self, key):
        """Get missing dates."""
        key_options = ['previous', 'all', 'less_than_20']
        if str(key) not in key_options:
            self.proceed = False  # If provided key not in options

        bpath = Path(baseDir().path, 'StockEOD/missing_dates', key)
        path = get_most_recent_fpath(bpath)
        df_dates = pd.read_parquet(path)

        # Define path of null dates
        null_path = Path(baseDir().path, 'StockEOD/missing_dates/null_dates',
                         '_null_dates.parquet')
        # Get all data that isn't null/empty
        if null_path.exists():
            null_df = pd.read_parquet(null_path)
            df = (pd.merge(df_dates, null_df, how='left',
                           indicator=True).query('_merge == "left_only"').drop(
                               columns=['_merge'], axis=1).copy())
            # If the merging failed
            if df.empty:
                df = df_dates

        self.null_dates = []
        self.merged_df = df
        self.missing_df = self._clean_process_missing(self, df)
        self.single_df, self.multiple_df = self._get_single_multiple_dfs(
            self, self.missing_df)
Ejemplo n.º 7
0
    def stocktwits():
        """Stocktwits data."""
        bpath = Path(baseDir().path, 'stocktwits')

        stocktwits = ({
            'trending':
            get_most_recent_fpath(bpath.joinpath('trending'), f_pre='_')
        })

        return stocktwits
Ejemplo n.º 8
0
    def intraday_tick():
        """Intraday tick data."""
        bpath_t = Path(baseDir().path, 'tickers', 'sectors')

        ticks = ({
            'sector_perf':
            get_most_recent_fpath(bpath_t, f_pre='performance'),
            'treasuries':
            Path(baseDir().path, 'economic_data', 'treasuries.parquet')
        })

        return ticks
def get_yf_loop_missing_hist(key='less_than_20', cs=False, sym_list=None, verb=False, refresh_missing_dates=True):
    """Get less_than_20 syms and call GetYfMissingDates."""

    if sym_list:
        pass
        if verb:
            help_print_arg('get_yf_loop_missing_hist: sym_list assumed')
    elif key == 'get_ignore_ytd':
        df_all = read_clean_combined_all()
        dt = getDate.query('iex_eod')
        df_year = df_all[df_all['date'].dt.year == dt.year].copy(deep=True)
        vc = df_year.value_counts(subset='symbol', ascending=False)
        syms_one_miss = vc[(vc < (vc.max() - 1)) & (vc > 0)].index
        sym_list = syms_one_miss.tolist()
        if verb:
            help_print_arg('get_yf_loop_missing_hist: key==get_ignore_ytd : syms_one_miss')
    elif cs is True:
        if refresh_missing_dates:
            MissingHistDates(cs=True)
        bpath = Path(baseDir().path, "StockEOD/missing_dates/all")
        fpath = get_most_recent_fpath(bpath)
        df_dates = pd.read_parquet(fpath)
        # Get all symbols, reduce to common stock and adr's
        sym_list = df_dates['symbol'].unique().tolist()
        if verb:
            help_print_arg('get_yf_loop_missing_hist: cs=True')
    else:
        if refresh_missing_dates:
            MissingHistDates()
        bpath = Path(baseDir().path, f"StockEOD/missing_dates/{key}")
        fpath = get_most_recent_fpath(bpath)
        df_dates = pd.read_parquet(fpath)
        sym_list = df_dates['symbol'].unique().tolist()
        if verb:
            help_print_arg('get_yf_loop_missing_hist: sym_list from missing_dates/key')
    for sym in tqdm(sym_list):
        try:
            GetYfMissingDates(sym=sym)
        except Exception as e:
            help_print_arg(f"get_yf_loop_missing_hist error: {str(e)}")
Ejemplo n.º 10
0
def get_cboe_ref(ymaster=False):
    """Get cboe reference data for use on yfinance."""
    df = None
    path = Path(baseDir().path, 'derivatives/cboe_symref')
    fpath = get_most_recent_fpath(path, f_pre='symref')
    df = pd.read_parquet(fpath)
    # cols_to_drop = ['Cboe Symbol', 'Closing Only']
    df = df.rename(columns={'Underlying': 'symbol'})
    # .drop(columns=cols_to_drop))

    if ymaster:
        df = pd.DataFrame(df['symbol'].unique(), columns=['symbol']).copy()

    return df
Ejemplo n.º 11
0
    def last_bus_day_syms():
        """Read all symbols from the last business day."""
        sdir = Path(baseDir().path, 'tickers', 'new_symbols')
        fpath = get_most_recent_fpath(sdir, f_pre='_')
        sym_df = False

        if fpath.exists():
            sym_df = pd.read_parquet(fpath)
        else:
            fpath = sdir.parent.joinpath('symbol_list', 'all_symbols.parquet')
            if fpath.exists():
                sym_df = pd.read_parquet(fpath)
            else:
                sym_df = serverAPI('all_symbols').df
                write_to_parquet(sym_df, fpath)

        return sym_df
Ejemplo n.º 12
0
    def warrants():
        """Warrant information/records."""
        bpath = Path(baseDir().path, 'tickers', 'warrants')

        warrants = ({
            'all':
            get_most_recent_fpath(bpath.joinpath('all')),
            'all_hist':
            get_most_recent_fpath(bpath.joinpath('all_hist')),
            'cheapest':
            get_most_recent_fpath(bpath.joinpath('cheapest')),
            'newest':
            get_most_recent_fpath(bpath.joinpath('newest')),
            'top_perf':
            get_most_recent_fpath(bpath.joinpath('top_perf')),
            'worst_pef':
            get_most_recent_fpath(bpath.joinpath('worst_perf'))
        })

        return warrants
Ejemplo n.º 13
0
import numpy as np
import requests

try:
    from scripts.dev.multiuse.help_class import baseDir, getDate, write_to_parquet, help_print_arg
    from scripts.dev.multiuse.path_helpers import get_most_recent_fpath
except ModuleNotFoundError:
    from multiuse.help_class import baseDir, getDate, write_to_parquet, help_print_arg
    from multiuse.path_helpers import get_most_recent_fpath

# %% codecell

# %% codecell
# def cboe_clean_symbol_ref
bpath = Path(baseDir().path, 'ref_data/yoptions_ref/cboe_ref_raw')
fpath = get_most_recent_fpath(bpath)
cols_to_read = ['OSI Symbol', 'Underlying']

df = dd.read_parquet(fpath, columns=cols_to_read)

df['OSI Symbol'] = df['OSI Symbol'].str.replace(' ', '')
df['Underlying'] = df['Underlying'].str.replace('.', '', regex=False)

df = df[df['Underlying'] != 'C']

df['sym_suf'] = df.apply(
    lambda row: row['OSI Symbol'].replace(row['Underlying'], ''),
    axis=1,
    meta=('sym_suf', 'object'))

df = df.assign(suf_temp=df['sym_suf'].str.replace(' ', '').str.replace(
Ejemplo n.º 14
0
def read_clean_combined_all(local=False, dt=None, filter_syms=True):
    """Read, clean, and add columns to StockEOD combined all."""
    df_all = None

    if local:
        bpath = Path(baseDir().path, 'StockEOD/combined_all')
        fpath = get_most_recent_fpath(bpath)
        cols_to_read = [
            'date', 'symbol', 'fOpen', 'fHigh', 'fLow', 'fClose', 'fVolume'
        ]
        df_all = pd.read_parquet(fpath, columns=cols_to_read)
        if df_all['date'].dtype == 'object':
            df_all['date'] = pd.to_datetime(df_all['date'])
        df_all.drop_duplicates(subset=['symbol', 'date'], inplace=True)
    else:
        cols_to_read = [
            'date', 'symbol', 'fOpen', 'fHigh', 'fLow', 'fClose', 'fVolume'
        ]
        df_all = serverAPI('stock_close_cb_all').df
        df_all = df_all[cols_to_read]

        if filter_syms:
            all_cs_syms = remove_funds_spacs()
            df_all = df_all[df_all['symbol'].isin(
                all_cs_syms['symbol'])].copy()

        df_all['date'] = pd.to_datetime(df_all['date'])

        # Define base bpath for 2015-2020 stock data
        bpath = Path(baseDir().path, 'historical/each_sym_all')
        path = get_most_recent_fpath(
            bpath.joinpath('each_sym_all', 'combined_all'))
        df_hist = pd.read_parquet(path)
        # Combine 2015-2020 stock data with ytd
        df_all = pd.concat([df_hist, df_all]).copy()

        df_all.drop_duplicates(subset=['symbol', 'date'], inplace=True)
        df_all.reset_index(drop=True, inplace=True)

    if not dt:
        dt = getDate.query('iex_eod')
    # Exclude all dates from before this year
    df_all = (df_all[df_all['date'] >= str(dt.year)].dropna(
        subset=['fVolume']).copy())

    # Get rid of all symbols that only have 1 day of data
    df_vc = df_all['symbol'].value_counts()
    df_vc_1 = df_vc[df_vc == 1].index.tolist()
    df_all = (df_all[~df_all['symbol'].isin(df_vc_1)].reset_index(
        drop=True).copy())
    # Sort by symbol, date ascending
    df_all = df_all.sort_values(by=['symbol', 'date'], ascending=True)

    df_all['fRange'] = (df_all['fHigh'] - df_all['fLow'])
    df_all['vol/mil'] = (df_all['fVolume'].div(1000000))
    df_all['prev_close'] = df_all['fClose'].shift(periods=1, axis=0)
    df_all['prev_symbol'] = df_all['symbol'].shift(periods=1, axis=0)

    # Add fChangeP col
    print('Fib funcs: adding fChangeP column')
    df_all = add_fChangeP_col(df_all)

    # Merge with df_all and resume

    # Add gap column
    print('Fib funcs: adding gap column')
    df_all = add_gap_col(df_all)

    # Add range of gap
    df_all['gRange'] = (np.where(df_all['prev_close'] < df_all['fLow'],
                                 df_all['fHigh'] - df_all['prev_close'],
                                 df_all['fHigh'] - df_all['fLow']))

    df_all['cumPerc'] = np.where(df_all['symbol'] == df_all['prev_symbol'],
                                 df_all['fChangeP'].cumsum(), np.NaN)

    df_all['perc5'] = np.where(df_all['symbol'] == df_all['prev_symbol'],
                               df_all['cumPerc'].shift(-5) - df_all['cumPerc'],
                               np.NaN)

    df_all['vol_avg_2m'] = np.where(df_all['symbol'] == df_all['prev_symbol'],
                                    df_all['fVolume'].rolling(60).mean(),
                                    np.NaN)

    # Add cumulative sum of last 5 fChangeP rows
    df_all['fCP5'] = (np.where(
        df_all['symbol'] == df_all['prev_symbol'],
        df_all['fChangeP'].rolling(min_periods=1, window=5).sum(), 0))

    df_all = df_all.copy()
    # Calc RSI and moving averages
    print('Fib Funcs: calc_rsi')
    df_all = calc_rsi(df_all)
    print('Fib Funcs: making_moving_averages')
    df_all = make_moving_averages(df_all)

    # fHighMax funcs
    print('Fib funcs: fHighMax')
    df_all = add_fHighMax_col(df_all).copy()

    df_all = df_all.sort_values(by=['symbol', 'date'], ascending=True)

    float_32s = df_all.dtypes[df_all.dtypes == np.float32].index
    for col in float_32s:
        df_all[col] = df_all[col].astype(np.float64).round(3)

    df_all = dataTypes(df_all, parquet=True).df.copy()

    return df_all