Example #1
0
    def check_warrants(cls, self):
        """Check if local warrant files exist."""
        wt_path_base = f"{self.base_dir}/tickers/warrants"
        dt = getDate.query('iex_eod')

        wt_path_dict = ({
            'Warrants: cheapest': f"{wt_path_base}/cheapest/_{dt}.parquet",
            'Warrants: newest': f"{wt_path_base}/newest/_{dt}.parquet",
            'Warrants: top perf ytd': f"{wt_path_base}/top_perf/_{dt}.parquet",
            'Warrants: worst perf ytd': f"{wt_path_base}/worst_perf/_{dt}.parquet",
            'Warrants: all': f"{wt_path_base}/all/_{dt}.parquet",
            'Warrants: all historical': f"{wt_path_base}/all_hist/_{dt}.parquet"
        })

        for key in wt_path_dict.keys():
            if os.path.isfile(wt_path_dict[key]):
                self.sys_dict[key] = True
            else:
                self.sys_dict[key] = False
Example #2
0
def get_nasdaq_symbol_changes():
    """Get symbol change history from nasdaq."""
    sym_change_url = 'https://api.nasdaq.com/api/quote/list-type-extended/symbolchangehistory'

    nasdaq_headers = ({
        'Host': 'api.nasdaq.com',
        'User-Agent':
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:94.0) Gecko/20100101 Firefox/94.0',
        'Accept': 'application/json, text/plain, */*',
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate, br',
        'Origin': 'https://www.nasdaq.com',
        'DNT': '1',
        'Connection': 'keep-alive',
        'Referer': 'https://www.nasdaq.com/',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'Sec-GPC': '1',
        'Pragma': 'no-cache',
        'Cache-Control': 'no-cache'
    })

    get = requests.get(sym_change_url, headers=nasdaq_headers)

    df_sym_change = None
    if get.status_code == 200:
        df_sym_change = (pd.DataFrame(
            get.json()['data']['symbolChangeHistoryTable']['rows']))
    else:
        msg1 = 'get_nasdaq_symbol_changes failed with url'
        msg2 = f"and status code {str(get.status_code)}"
        help_print_arg(f"{msg1} {sym_change_url} {msg2}")

    dt = getDate.query('iex_close')
    path = (Path(baseDir().path, 'ref_data/symbol_ref/symbol_changes',
                 f'_{dt}.parquet'))

    if isinstance(df_sym_change, pd.DataFrame):
        write_to_parquet(df_sym_change, path)
    else:
        raise Exception
Example #3
0
def combine_all_intraday_data(minute='minute_1'):
    """Combine all intraday data, write to file."""
    dt = getDate.query('iex_eod')
    path = Path(baseDir().path, 'intraday', minute, str(dt.year))
    fpaths = list(path.glob('**/*.parquet'))

    df_list = []
    for fpath in fpaths:
        try:
            df_list.append(pd.read_parquet(fpath))
        except Exception as e:
            msg = f"fpath: {str(fpath)} reason: {str(e)}"
            help_print_arg(msg)

    df_all = pd.concat(df_list)
    fpre = f'combined_all/{minute}/'
    fsuf = f"{fpre}_{dt}.parquet"
    path_to_write = path.parent.parent.joinpath(fsuf)

    write_to_parquet(df_all, path_to_write)
    def _get_clean_data(cls, self, sym, period, interval, proxy):
        """Request and clean data from yfinance."""
        data = yf.download(
                tickers=sym,
                period=period,
                interval=interval,
                group_by='ticker',
                auto_adjust=True,
                prepost=False,
                proxy=proxy
            )

        df = data.reset_index()
        df.insert(1, 'symbol', sym)
        (df.rename(columns={'Date': 'date', 'Open': 'fOpen', 'High': 'fHigh',
                            'Low': 'fLow', 'Close': 'fClose',
                            'Volume': 'fVolume'}, inplace=True))
        df = dataTypes(df, parquet=True).df
        dt = getDate.query('iex_eod')

        self.df_yf = df[df['date'].dt.date <= dt]
def get_yf_loop_missing_hist(key='less_than_20', cs=False, sym_list=None, verb=False, refresh_missing_dates=True):
    """Get less_than_20 syms and call GetYfMissingDates."""

    if sym_list:
        pass
        if verb:
            help_print_arg('get_yf_loop_missing_hist: sym_list assumed')
    elif key == 'get_ignore_ytd':
        df_all = read_clean_combined_all()
        dt = getDate.query('iex_eod')
        df_year = df_all[df_all['date'].dt.year == dt.year].copy(deep=True)
        vc = df_year.value_counts(subset='symbol', ascending=False)
        syms_one_miss = vc[(vc < (vc.max() - 1)) & (vc > 0)].index
        sym_list = syms_one_miss.tolist()
        if verb:
            help_print_arg('get_yf_loop_missing_hist: key==get_ignore_ytd : syms_one_miss')
    elif cs is True:
        if refresh_missing_dates:
            MissingHistDates(cs=True)
        bpath = Path(baseDir().path, "StockEOD/missing_dates/all")
        fpath = get_most_recent_fpath(bpath)
        df_dates = pd.read_parquet(fpath)
        # Get all symbols, reduce to common stock and adr's
        sym_list = df_dates['symbol'].unique().tolist()
        if verb:
            help_print_arg('get_yf_loop_missing_hist: cs=True')
    else:
        if refresh_missing_dates:
            MissingHistDates()
        bpath = Path(baseDir().path, f"StockEOD/missing_dates/{key}")
        fpath = get_most_recent_fpath(bpath)
        df_dates = pd.read_parquet(fpath)
        sym_list = df_dates['symbol'].unique().tolist()
        if verb:
            help_print_arg('get_yf_loop_missing_hist: sym_list from missing_dates/key')
    for sym in tqdm(sym_list):
        try:
            GetYfMissingDates(sym=sym)
        except Exception as e:
            help_print_arg(f"get_yf_loop_missing_hist error: {str(e)}")
Example #6
0
def get_missing_sec_master_idx(sma_df=False):
    """Get missing sec reference data files."""
    # sma_df is the master index file of all dates
    if not isinstance(sma_df, pd.DataFrame):
        sma_df = serverAPI('sec_master_all').df
        sma_df['date'] = pd.to_datetime(sma_df['date'], unit='ms')

    bus_days = getDate.get_bus_days(this_year=True)
    dt = getDate.query('iex_eod')
    bus_days = bus_days[bus_days['date'].dt.date <= dt].copy()

    dts_missing = bus_days[~bus_days['date'].isin(sma_df['date'].unique().
                                                  tolist())].copy()
    dts_missing['dt_format'] = dts_missing['date'].dt.strftime('%Y%m%d')

    for dt in tqdm(dts_missing['dt_format']):
        try:
            smi = secMasterIdx(hist_date=dt)
            sleep(.5)
        except Exception as e:
            msg = f"get_missing_sec_master_idx: {str(e)}"
            help_print_arg(msg)
Example #7
0
    def _start_for_loop(cls, self, dt, verbose, ntests):
        """Start for loop for syms."""
        if not dt:
            dt = getDate.query('iex_eod')

        if isinstance(dt, str):
            try:
                dt = datetime.strptime(dt, '%Y%m%d').date()
            except ValueError:
                dt = datetime.strptime(dt, '%Y-%m-%dT%H:%M:%S').date()
        elif isinstance(dt, Timestamp):
            dt = dt.date()

        bpath = Path(baseDir().path, 'intraday', 'minute_1', str(dt.year))

        error_dict = {}
        n = 0
        # Check if ntests is a integer (if we're testing)
        if not isinstance(ntests, int):
            ntests = 5

        for sym in tqdm(self.syms):
            try:
                self._get_sym_min_data(self, sym, dt, bpath, verbose)
            except Exception as e:
                error_dict[sym] = ({
                    'symbol': sym,
                    'type': str(type(e)),
                    'reason': str(e)
                })
                if verbose:
                    msg = f"{sym} get_sym_min_data. Reason: {str(e)}"
                    help_print_arg(msg)
            if ntests:  # If testing, eventually break
                n += 1
                if n > ntests:
                    break

        self._error_handling(self, error_dict, bpath)
Example #8
0
 def _construct_params(cls, self):
     """Construct parameters for request, fpath."""
     if self.get_hist_date:
         # Convert historical date str to datetime.date
         hist_dt = (datetime.datetime.strptime(
                     self.get_hist_date, '%Y%m%d').date())
         yr = hist_dt.year
         # Financial quarter that we are currently in
         f_quart = f"QTR{str((hist_dt.month - 1) // 3 + 1)}"
         dt_fmt = self.get_hist_date
     else:
         dt = getDate.query('sec_master')
         yr = dt.year  # Year
         # Financial quarter that we are currently in
         f_quart = f"QTR{str((dt.month - 1) // 3 + 1)}"
         # Formatted year month day
         dt_fmt = dt.strftime('%Y%m%d')
     # Url suffix using the formatted date
     mast_suf = f"master.{dt_fmt}.idx"
     self.fpath = f"{self.baster}/{yr}/_{dt_fmt}.parquet"
     self.fpath_raw = f"{self.baster}/{yr}/raw/_{dt_fmt}.parquet"
     self.url = f"{self.sec_burl}/{yr}/{f_quart}/{mast_suf}"
Example #9
0
def return_yoptions_temp_all():
    """Return dataframe of all yoptions temp (today's data)."""
    df_all = None

    # If local environment
    if 'Algo' in baseDir().path:
        try:
            from api import serverAPI
            df_all = serverAPI('yoptions_temp').df
        except ModuleNotFoundError as me:
            help_print_arg(str(me))
    else:  # Assume production environment
        dt = getDate.query('iex_eod')
        yr = dt.year
        fpath = Path(baseDir().path, 'derivatives/end_of_day/temp', str(yr))
        globs = list(fpath.glob('**/*.parquet'))

        df_list = []
        [df_list.append(pd.read_parquet(path)) for path in globs]
        df_all = pd.concat(df_list)

    return df_all
Example #10
0
def get_last_30_intradays():
    """Get last 30 intraday trading days."""
    bsdays = getDate.get_bus_days()

    dt_today = getDate.query('iex_eod')
    dt_30 = dt_today - timedelta(days=30)

    days = (bsdays[(bsdays['date'].dt.date >= dt_30)
            & (bsdays['date'].dt.date <= dt_today)])

    df_m1 = serverAPI('iex_intraday_m1').df
    days_tget = (days[~days['date'].isin(df_m1['date']
                 .unique())].copy())
    # days_tget['dt_fmt'] = days_tget['date'].dt.strftime('%Y%m%d')

    try:
        from app.tasks import execute_func
        for dt in days_tget['date']:
            kwargs = {'dt': dt}
            execute_func.delay('iex_intraday', **kwargs)
    except ModuleNotFoundError:
        pass
Example #11
0
    def merge_dfs(cls, self):
        """Merge mmo and symref dataframes."""
        merge_list = ['Symbol', 'Underlying']
        if ('exchange' in self.mmo_df.columns
                and 'exchange' in self.sym_df.columns):
            merge_list.append('exchange')

        try:
            df = (pd.merge(self.mmo_df,
                           self.sym_df,
                           on=merge_list,
                           how='inner'))

            df.reset_index(inplace=True, drop=True)
            # df['rptDate'] = date.today()
            df['rptDate'] = getDate.query('cboe')

            # Change data types to reduce file size
            df = dataTypes(df, parquet=True).df
            # df = dataTypes(df).df
        except TypeError:
            df = pd.DataFrame()
        return df
Example #12
0
 def _clean_process_missing(cls, self, df):
     """Clean and process missing dates df for data requests."""
     # Format dates for iex
     dts = df['date'].drop_duplicates()
     m = pd.Series(dts.dt.strftime('%Y%m%d'))
     m.index = dts
     df['dt'] = df['date'].map(m)
     # Iex exact date url construction
     df['sym_lower'] = df['symbol'].str.lower()
     try:
         df['url'] = (df.apply(
             lambda row:
             f"/stock/{row['sym_lower']}/chart/date/{row['dt']}?chartByDay=true",
             axis=1))
     except KeyError:
         help_print_arg(
             f"_clean_process_missing: lambda KeyError: {str(df.columns)}")
     # Construct .parquet paths for reading/writing to local data file
     dt = getDate.query('iex_previous')
     bpath = Path(baseDir().path, 'StockEOD', str(dt.year))
     df['path_parq'] = (df.apply(lambda row: Path(
         bpath, row['symbol'].lower()[0], f"_{row['symbol']}.parquet"),
                                 axis=1))
     return df
Example #13
0
def get_most_recent_fpath(fpath_dir, f_pre='', f_suf='', dt='', this_year=True, second=False):
    """Get the most recent fpath in a directory."""
    path_to_return = False
    if not dt:  # If no date passed, default to iex_eod
        dt = getDate.query('iex_close')

    dt_list = getDate.get_bus_days(this_year=this_year)
    date_list = (dt_list[dt_list['date'].dt.date <= dt]
                 .sort_values(by=['date'], ascending=False))


    date_list['fpath'] = (date_list.apply(lambda row:
                                          f"_{row['date'].date()}",
                                          axis=1))

    date_list['fpath_yr'] = (date_list.apply(lambda row:
                                             f"_{row['date'].year}",
                                             axis=1))
    date_list['fpath_fmt'] = (date_list.apply(lambda row:
                                              f"_{row['date'].date().strftime('%Y%m%d')}",
                                              axis=1))

    # Iterate through dataframe to find the most recent
    for index, row in date_list.iterrows():
        tpath = Path(fpath_dir, f"{f_pre}{row['fpath']}{f_suf}.parquet")
        if tpath.exists():
            return tpath

    # Iterate through dataframe to find the most recent
    for index, row in date_list.iterrows():
        tpath = Path(fpath_dir, f"{f_pre}{row['fpath_yr']}{f_suf}.parquet")
        if tpath.exists():
            return tpath

    # Iterate through dataframe to find the most recent
    for index, row in date_list.iterrows():
        tpath = Path(fpath_dir, f"{f_pre}{row['fpath_fmt']}{f_suf}.parquet")
        if tpath.exists():
            return tpath

    """
    if not f_pre and not f_suf:
        for index, row in date_list.iterrows():
            if Path(fpath_dir, f"{row['fpath']}.parquet").exists():
                path_to_return = Path(fpath_dir, f"{row['fpath']}.parquet")
                return path_to_return
    elif f_pre and not f_suf:
        for index, row in date_list.iterrows():
            if Path(fpath_dir, f"{f_pre}{row['fpath']}.parquet").exists():
                path_to_return = Path(fpath_dir, f"{f_pre}{row['fpath']}.parquet")
                return path_to_return
    elif not f_pre and f_suf:
        for index, row in date_list.iterrows():
            if Path(fpath_dir, f"{row['fpath']}{f_suf}.parquet").exists():
                path_to_return = Path(fpath_dir, f"{row['fpath']}{f_suf}.parquet")
                return path_to_return
    elif f_pre and f_suf:
        for index, row in date_list.iterrows():
            if Path(fpath_dir, f"{f_pre}{row['fpath']}{f_suf}.parquet").exists():
                path_to_return = Path(fpath_dir, f"{f_pre}{row['fpath']}{f_suf}.parquet")
                return path_to_return
    """
    if not path_to_return and not second:
        path_to_return = get_most_recent_fpath(fpath_dir, this_year=False, second=True)
        if path_to_return:
            help_print_arg(f"get_most_recent_fpath: first failed. Returning {str(path_to_return)}")
            return path_to_return

    if not path_to_return:
        msg_1 = "Directory empty or path doesn't follow format '_dt.parquet'. Returning first path"
        msg_2 = f": {fpath_dir}"
        help_print_arg(f"{msg_1} {msg_2}")

        paths = list(Path(fpath_dir).glob('*.parquet'))
        if paths:
            path_to_return = paths[-1]
            return path_to_return
Example #14
0
    from api import serverAPI

# %% codecell
from missing_data.get_missing_hist_from_yf import get_yf_loop_missing_hist

sym_list = ['GENI']
get_yf_loop_missing_hist(sym_list=sym_list)
# %% codecell
# I'd like 2 dataframes:
# One with the data updated every 10 minutes that I can
# clean later on
# Another with the raw timestamps that
# I'll have to sort through later on
from multiuse.path_helpers import get_most_recent_fpath

dt = getDate.query('iex_close')
bpath = Path(baseDir().path, 'derivatives/cboe_intraday/2021')
fpath = get_most_recent_fpath(bpath, f_suf='_eod', dt=dt)

# %% codecell
from workbooks.fib_funcs import read_clean_combined_all
df_all = read_clean_combined_all()

# Eagles golf club

# path_eod won't have the timestamps
# %% codecell

# %% codecell
sapi_eod = serverAPI('cboe_intraday_eod')
df_cboe = sapi_eod.df
Example #15
0
def get_rows(df_sym, max_row, verb=False, calc_date_range=False):
    """Get rows adjacent to the max row to use for analysis."""
    # Rules - there should only be one fibonacci movement
    # Limit to major runs - one to two candles
    # Start with the highest volume for the time period (coincidentally the highest % change also)
    rows = None

    n_list = list(range(50))
    # Okay so this goes one row forward and sees if conditions are met
    min_idx = max_row.index[0]
    max_idx = max_row.index[0]

    # Moving forward
    for n in n_list:
        try:
            row = df_sym.loc[max_idx + (n + 1)]

            if (row['fChangeP'] > -0.005) & (row['fHigh'] >
                                             max_row['fHigh'].iloc[0]):
                if verb:
                    print(f"Max idx: {str(max_idx + (n + 1))}")
            elif (abs(row['fChangeP']) < .0035) & (row['fHigh'] >
                                                   max_row['fLow'].iloc[0]):
                if verb:
                    print(f"Max idx: {str(max_idx + (n + 1))}")
            else:
                if verb:
                    print(f"Max idx {str(row)}")
                max_idx = max_idx + n
                break
        except (KeyError,
                IndexError) as ke:  # When the index value isn't in the sym_df
            if verb:
                print(f"Max idx ke error: {str(ke)}")
            max_idx = max_idx + n
            break

    # Moving back
    for n in n_list:
        try:
            row = df_sym.loc[min_idx - (n + 1)]
            row_pre = df_sym.loc[min_idx - (n)]
            # If not, go one row back
            if (row['fChangeP'] >= -.005) & (row['fClose'] <
                                             max_row['fClose'].iloc[0]) & (
                                                 row['fClose'] > row['fOpen']):
                # rows = pd.concat([row, max_row]).sort_values(by=['fVolume'], ascending=True)
                if verb:
                    print(
                        f"Min idx: {str(min_idx - (n + 1))}: pos rowChangeP & fClose < max_row['fClose']"
                    )
            elif ((abs(row['fChangeP']) < .0035)
                  & (row['fHigh'] * .995 < max_row['fLow'].iloc[0])
                  & (row['fHigh'] > row_pre['fHigh'])
                  & (row['fLow'] > row_pre['fLow'])):
                if verb:
                    print(
                        f"Min idx: {str(min_idx - (n + 1))}: row['fHigh'] * .995 < max_row['fLow']"
                    )
            elif (((max_row['fLow'].iloc[0] > row['fLow'])
                   & (max_row['fClose'].iloc[0] > row['fHigh']))
                  & (row['fHigh'] > row_pre['fHigh'])
                  & (row['fLow'] > row_pre['fLow'])):
                if verb:
                    print(
                        f"Min idx: {str(min_idx - (n + 1))}: max_row['fLow'].iloc[0] > row['fLow']"
                    )
            elif ((max_row['fLow'].iloc[0] > row['fLow']) &
                  (max_row['fHigh'].iloc[0] < row['fOpen'])):
                min_idx = min_idx - (n + 1)
                if verb:
                    print(
                        f"Min idx condition reached: {str(min_idx - (n + 1))}: max_row['fHigh'] < row['fOpen']"
                    )
                break
            elif ((max_row['fLow'].iloc[0] - row['fHigh']) > .5):
                min_idx = min_idx - (n + 1)
                if verb:
                    print(
                        f"Min idx condition reached: {str(min_idx - (n + 1))}: max_row['fLow'] - row['fHigh'] > .5"
                    )
                break
            else:
                min_idx = min_idx - n
                if verb:
                    print(f"Min idx condition reached: {str(row)}")
                break
        except (KeyError, IndexError) as ke:
            if verb:
                print(f"Min idx ke error: {str(ke)}")
            min_idx = min_idx - n
            break

    if min_idx == max_idx:
        rows = max_row
        if verb:
            print(f"Min idx == max_idx for symbol {max_row['symbol'].iloc[0]}")
    else:
        rows = df_sym.loc[min_idx:max_idx]
        if verb:
            print(f"Max row symbol == {max_row['symbol'].iloc[0]}")
            print(f"Max row index == {max_row.index}")
            print(f"Min Idx == {min_idx}")
            print(f"Max Idx == {max_idx}")
            print(f"Rows shape {rows.shape[0]}")

    if calc_date_range:
        holidays_fpath = Path(baseDir().path, 'ref_data/holidays.parquet')
        holidays = pd.read_parquet(holidays_fpath)
        dt = getDate.query('sec_master')
        current_holidays = (holidays[(holidays['date'].dt.year >= dt.year)
                                     & (holidays['date'].dt.date <= dt)])
        hol_list = current_holidays['date'].dt.date.tolist()
        (rows.insert(
            2, "date_range",
            rows.apply(lambda row: np.busday_count(rows['date'].min().date(),
                                                   rows['date'].max().date(),
                                                   holidays=hol_list),
                       axis=1)))

    return rows
Example #16
0
    from scripts.dev.api import serverAPI
except ModuleNotFoundError:
    from multiuse.help_class import baseDir, getDate, write_to_parquet, help_print_arg
    from data_collect.iex_class import urlData
    from api import serverAPI

from importlib import reload
import sys
reload(sys.modules['multiuse.help_class'])

# %% codecell
pd.set_option('display.max_columns', 65)
pd.set_option('display.max_rows', 150)

# %% codecell
dt = getDate.query('iex_eod')
bpath = Path(baseDir().path, 'intraday', 'minute_1', str(dt.year))

all_syms = serverAPI('all_symbols').df
syms = all_syms['symbol'].unique()


# %% codecell

minute = 'minute_1'


def combine_all_intraday_data(minute='minute_1'):
    """Combine all intraday data, write to file."""
    dt = getDate.query('iex_eod')
    path = Path(baseDir().path, 'intraday', minute, str(dt.year))
Example #17
0
# %% codecell

df_all['fourWC'] = np.where(
    ((df_all['symbol'] == df_all['prev_symbol'])
     & (df_all['fChangeP'] > .01)
     & (df_all['fClose'] > df_all['fClose'].shift(-1, axis=0))
     & (df_all['fClose'].shift(1, axis=0) > df_all['fClose'].shift(2, axis=0))
     & (df_all['fClose'].shift(2, axis=0) > df_all['fClose'].shift(3, axis=0))
     & (df_all['fClose'].shift(3, axis=0) > df_all['fClose'].shift(4, axis=0))
     &
     (df_all['fClose'].shift(4, axis=0) > df_all['fClose'].shift(5, axis=0))),
    (df_all.index - 5), 0)

holidays_fpath = Path(baseDir().path, 'ref_data/holidays.parquet')
holidays = pd.read_parquet(holidays_fpath)
dt = getDate.query('sec_master')
current_holidays = (holidays[(holidays['date'].dt.year >= dt.year)
                             & (holidays['date'].dt.date <= dt)])
hol_list = current_holidays['date'].dt.date.tolist()

df_four = df_all[(df_all['fourWC'] != 0)].copy()
df_four.insert(0, 'prevSymDate', df_four['date'].shift(1, axis=0))
df_four.insert(3, 'prevSymSub', df_four['symbol'].shift(1, axis=0))
cols_to_keep = ['prevSymDate', 'date', 'symbol', 'prevSymSub', 'fourWC']
df_four_sub = df_four[df_four['symbol'] ==
                      df_four['prevSymSub']][cols_to_keep].copy()

(df_four_sub.insert(
    2, "dayDiff",
    df_four_sub.apply(lambda row: np.busday_count(
        row['prevSymDate'].date(), row['date'].date(), holidays=hol_list),
Example #18
0
importlib.reload(sys.modules['workbooks.fib_funcs'])
from workbooks.fib_funcs import read_clean_combined_all

df_all = read_clean_combined_all(local=True)
df_all['date'] = pd.to_datetime(df_all['date'])
df_2021 = df_all[df_all['date'].dt.year == 2021].copy(deep=True)
# %% codecell

# %% codecell
# get_yf_loop_missing_hist(key='get_ignore_ytd')
all_syms = serverAPI('all_symbols').df
all_syms.columns


# %% codecell
dt = getDate.query('iex_previous')
dt
url1 = 'ftp://ftp.nyxdata.com/'
url2 = 'cts_summary_files'
url3 = f"CTA.summary.{dt.strftime('%Y%m%d')}"

url = f"{url1}{url2}"
get = requests.get()


import shutil
import urllib.request as request
from contextlib import closing
from urllib.error import URLError

file = url3
Example #19
0
def read_clean_combined_all(local=False, dt=None, filter_syms=True):
    """Read, clean, and add columns to StockEOD combined all."""
    df_all = None

    if local:
        bpath = Path(baseDir().path, 'StockEOD/combined_all')
        fpath = get_most_recent_fpath(bpath)
        cols_to_read = [
            'date', 'symbol', 'fOpen', 'fHigh', 'fLow', 'fClose', 'fVolume'
        ]
        df_all = pd.read_parquet(fpath, columns=cols_to_read)
        if df_all['date'].dtype == 'object':
            df_all['date'] = pd.to_datetime(df_all['date'])
        df_all.drop_duplicates(subset=['symbol', 'date'], inplace=True)
    else:
        cols_to_read = [
            'date', 'symbol', 'fOpen', 'fHigh', 'fLow', 'fClose', 'fVolume'
        ]
        df_all = serverAPI('stock_close_cb_all').df
        df_all = df_all[cols_to_read]

        if filter_syms:
            all_cs_syms = remove_funds_spacs()
            df_all = df_all[df_all['symbol'].isin(
                all_cs_syms['symbol'])].copy()

        df_all['date'] = pd.to_datetime(df_all['date'])

        # Define base bpath for 2015-2020 stock data
        bpath = Path(baseDir().path, 'historical/each_sym_all')
        path = get_most_recent_fpath(
            bpath.joinpath('each_sym_all', 'combined_all'))
        df_hist = pd.read_parquet(path)
        # Combine 2015-2020 stock data with ytd
        df_all = pd.concat([df_hist, df_all]).copy()

        df_all.drop_duplicates(subset=['symbol', 'date'], inplace=True)
        df_all.reset_index(drop=True, inplace=True)

    if not dt:
        dt = getDate.query('iex_eod')
    # Exclude all dates from before this year
    df_all = (df_all[df_all['date'] >= str(dt.year)].dropna(
        subset=['fVolume']).copy())

    # Get rid of all symbols that only have 1 day of data
    df_vc = df_all['symbol'].value_counts()
    df_vc_1 = df_vc[df_vc == 1].index.tolist()
    df_all = (df_all[~df_all['symbol'].isin(df_vc_1)].reset_index(
        drop=True).copy())
    # Sort by symbol, date ascending
    df_all = df_all.sort_values(by=['symbol', 'date'], ascending=True)

    df_all['fRange'] = (df_all['fHigh'] - df_all['fLow'])
    df_all['vol/mil'] = (df_all['fVolume'].div(1000000))
    df_all['prev_close'] = df_all['fClose'].shift(periods=1, axis=0)
    df_all['prev_symbol'] = df_all['symbol'].shift(periods=1, axis=0)

    # Add fChangeP col
    print('Fib funcs: adding fChangeP column')
    df_all = add_fChangeP_col(df_all)

    # Merge with df_all and resume

    # Add gap column
    print('Fib funcs: adding gap column')
    df_all = add_gap_col(df_all)

    # Add range of gap
    df_all['gRange'] = (np.where(df_all['prev_close'] < df_all['fLow'],
                                 df_all['fHigh'] - df_all['prev_close'],
                                 df_all['fHigh'] - df_all['fLow']))

    df_all['cumPerc'] = np.where(df_all['symbol'] == df_all['prev_symbol'],
                                 df_all['fChangeP'].cumsum(), np.NaN)

    df_all['perc5'] = np.where(df_all['symbol'] == df_all['prev_symbol'],
                               df_all['cumPerc'].shift(-5) - df_all['cumPerc'],
                               np.NaN)

    df_all['vol_avg_2m'] = np.where(df_all['symbol'] == df_all['prev_symbol'],
                                    df_all['fVolume'].rolling(60).mean(),
                                    np.NaN)

    # Add cumulative sum of last 5 fChangeP rows
    df_all['fCP5'] = (np.where(
        df_all['symbol'] == df_all['prev_symbol'],
        df_all['fChangeP'].rolling(min_periods=1, window=5).sum(), 0))

    df_all = df_all.copy()
    # Calc RSI and moving averages
    print('Fib Funcs: calc_rsi')
    df_all = calc_rsi(df_all)
    print('Fib Funcs: making_moving_averages')
    df_all = make_moving_averages(df_all)

    # fHighMax funcs
    print('Fib funcs: fHighMax')
    df_all = add_fHighMax_col(df_all).copy()

    df_all = df_all.sort_values(by=['symbol', 'date'], ascending=True)

    float_32s = df_all.dtypes[df_all.dtypes == np.float32].index
    for col in float_32s:
        df_all[col] = df_all[col].astype(np.float64).round(3)

    df_all = dataTypes(df_all, parquet=True).df.copy()

    return df_all