Ejemplo n.º 1
0
    def local_get_data(cls, self, bins_unique, testing):
        """Get historical data using imported function."""
        # For each of the 1000 symbol bins, get data
        for bin in bins_unique:
            syms_part = self.df[self.df['bins'] == bin]
            if testing:
                syms_part = syms_part.sample(n=5).copy(deep=True)
            sym_list = syms_part['symbol'].tolist()

            # Using list of symbols, call function to get data and store local
            if self.last_month:
                for sym in sym_list:
                    HistPricesV2(sym, last_month=True)
            elif self.previous:
                for sym in sym_list:
                    try:
                        HistPricesV2(sym, previous=True)
                    except NameError as ne:
                        msg = f"Master Hist Prices Error: symbol - {sym} - {str(ne)}"
                        help_print_arg(msg)
                        break
            else:
                for sym in sym_list:
                    HistPricesV2(sym)

        return True
Ejemplo n.º 2
0
def cboe_symref_raw():
    """Read, concat, and write cboe symbol ref."""
    mkt_list = ['cone', 'opt', 'ctwo', 'exo']
    burl1 = 'https://www.cboe.com/us/options/'
    burl2 = 'market_statistics/symbol_reference/?mkt='
    url_end = '&listed=1&unit=1&closing=1'

    df_list = []
    for mkt in mkt_list:
        url = f"{burl1}{burl2}{mkt}{url_end}"
        get = requests.get(url)

        if get.status_code == 200:
            df_list.append(pd.read_csv(BytesIO(get.content)))
        else:
            help_print_arg(f"Symbol ref request failed for mkt {str(mkt)}")

    df = pd.concat(df_list)
    cols_to_drop = ['Matching Unit', 'Closing Only']
    df.drop(columns=cols_to_drop, inplace=True)
    if df['OSI Symbol'].isna().sum() != 0:
        df.dropna(subset=['OSI Symbol'], inplace=True)
    # %% codecell

    dt = getDate.query('iex_close')
    path_to_write = (Path(baseDir().path,
                     'ref_data/yoptions_ref/cboe_ref_raw', f'_{dt}.parquet'))

    write_to_parquet(df, path_to_write)
Ejemplo n.º 3
0
    def _get_sym_min_data(cls, self, sym, dt, bpath, verbose=False):
        """Get minute data for symbol. Write to file."""
        if not dt:
            dt = getDate.query('iex_eod')

        # Construct fpath
        fpath = bpath.joinpath(sym[0].lower(), f"_{sym}.parquet")
        # Construct url
        url_p1 = f"/stock/{sym.lower()}/chart/date/"
        url_p2 = f"{dt.strftime('%Y%m%d')}?chartByDay=false"
        url = f"{url_p1}{url_p2}"

        if verbose:  # If verbose print out key vars
            msg = f"Sym: {sym}, Date: {str(dt)}, fpath: {str(fpath)}, url: {url}"
            help_print_arg(msg)

        # Get data with requested url
        df_ud = urlData(url).df
        df_ud['dtime'] = (pd.to_datetime(df_ud['date'] + df_ud['minute'],
                                         format='%Y-%m-%d%H:%M'))
        df_ud['date'] = pd.to_datetime(df_ud['date'], format='%Y-%m-%d')
        df_ud.insert(0, 'symbol', sym)
        # Write to parquet and exit function
        df_ud['symbol'] = df_ud['symbol'].astype('category')
        (df_ud.drop(columns=['minute', 'exchangeType'],
                    inplace=True,
                    errors="ignore"))

        write_to_parquet(df_ud, fpath, combine=True)
Ejemplo n.º 4
0
def makedirs_with_permissions(path):
    """Make directory with permissions."""
    if not os.path.isdir(path):
        os.umask(0)
        os.makedirs(path, mode=0o777)
    else:
        help_print_arg(f"Directory already exists: {str(path)}")
Ejemplo n.º 5
0
    def filter_my_stocks(cls, self):
        """Filter dataframe for my stocks."""
        path = Path(baseDir().path, 'tickers', 'my_syms.parquet')
        my_df = pd.read_parquet(path)
        # Convert local dataframe to syms to look for
        inv_list = my_df['symbol'].tolist()

        if ('form' or 'cik') not in self.df.columns:
            col_dict = {'description': 'form', 'CIK': 'cik'}
            self.df.rename(columns=col_dict, inplace=True)

        df_inv = self.df[self.df['symbol'].isin(inv_list)].copy()

        if (df_inv.shape[0] == 0) and self.testing:
            help_print_arg("AnalyzeSecRss: no matching stocks for rss feed")

        forms_to_watch = ['8-K', '3', '4']
        # df_forms = df_inv[df_inv['form'].isin(forms_to_watch)]

        msg_dict = {sym: [] for sym in inv_list}
        for index, row in df_inv.iterrows():
            if row['cik']:
                msg = f"{row['symbol']} has just filed form {row['form']}"
                msg_dict[row['symbol']].append(msg)

        self.msg_dict = msg_dict
        self.df_inv = df_inv.copy()
Ejemplo n.º 6
0
def get_all_symbol_ref():
    """Get all common and OTC symbols."""
    load_dotenv()
    env = os.environ.get("env")

    df_all = None

    if env == "production":
        bpath = Path(baseDir().path, 'tickers', 'symbol_list')

        com_syms_path = bpath.joinpath('all_symbols.parquet')
        otc_syms_path = bpath.joinpath('otc_syms.parquet')
        com_df = pd.read_parquet(com_syms_path)
        otc_df = pd.read_parquet(otc_syms_path)
        otc_df.dropna(subset=['cik'], inplace=True)
        otc_df['cik'] = (otc_df['cik'].astype('int64').astype('str').str.zfill(
            10).astype('category').reset_index(drop=True))
        df_all = pd.concat([com_df, otc_df]).reset_index(drop=True)
    else:
        try:
            from api import serverAPI
            com_syms = serverAPI('all_symbols').df
            otc_syms = serverAPI('otc_syms').df
            df_all = pd.concat([com_syms, otc_syms]).reset_index(drop=True)
        except ModuleNotFoundError:
            help_print_arg('Tried import server api in get_all_symbols func')

    return df_all
Ejemplo n.º 7
0
    def get_last_range(cls, self, sym):
        """Get last month of data."""
        get = requests.get(self.url, params=self.payload)
        # If at first you don't succeed, try, try again.
        if get.status_code != 200:
            get = requests.get(self.url, params=self.payload)
        self.get = get

        if get.status_code == 200:
            try:
                df = pd.DataFrame(get.json())
            except ValueError:
                df = pd.DataFrame.from_dict(get.json(), orient='index').T
            # self.df = dataTypes(df).df
            if os.path.isfile(self.fpath):
                old_df = pd.read_parquet(self.fpath)
                df_all = pd.concat([old_df, df]).reset_index(drop=True)
                write_to_parquet(df_all, self.fpath)
                # Assign dataframe to class attribute
                self.df = df_all
            else:
                # Write dataframe to parquet file
                write_to_parquet(df, self.fpath)
                # Assign dataframe to class attribute
                self.df = df
        else:
            msg = f"IexHistV2 for {sym} get request failed with status_code {get.status_code}"
            help_print_arg(msg)
Ejemplo n.º 8
0
    def clean_data(cls, self):
        """Clean dataframe - remove na columns."""
        df = self.df
        na_cutoff = (.75 * df.shape[0])
        cols_to_drop = []
        for col in df.columns:
            if df[col].isna().sum() > na_cutoff:
                cols_to_drop.append(col)

        # Drop columns that are at least 3/4 nas
        df.drop(columns=cols_to_drop, inplace=True)

        # Extract CIK from title column
        try:
            df['CIK'] = df['title'].str.extract("\((.*?)\)")
        except Exception as e:
            help_print_arg(f"SEC RSS CIK Error: {str(e)}")

        df['dt'] = pd.to_datetime(df['pubDate'])
        prev_15 = (datetime.now() - timedelta(minutes=60)).time()
        sec_df = (df[(df['dt'].dt.time > prev_15)
                     & (df['dt'].dt.date == date.today())].copy())

        self.df = df.copy()
        try:
            AnalyzeSecRss(latest=True, sec_df=sec_df)
        except Exception as e:
            help_print_arg(f"SecRss: AnalyzeSecRss Error {str(e)}")
Ejemplo n.º 9
0
    def _display_options(cls, self, category, keyword):
        """Display categorical options."""
        cat_dict = ({
            'peers': FpathDicts.get_peers(),
            'refs': FpathDicts.symbol_ref_data(),
            'ticks': FpathDicts.intraday_tick(),
            'warrants': FpathDicts.warrants(),
            'company_stats': FpathDicts.company_stats(),
            'scans': FpathDicts.scans(),
            'sec': FpathDicts.sec(),
            'externals': FpathDicts.externals(),
            'stocktwits': FpathDicts.stocktwits(),
            'historical': FpathDicts.historical()
        })
        self.cat_dict = cat_dict

        if category in cat_dict:
            if keyword:
                if keyword in cat_dict[category]:
                    self.fpath = cat_dict[category][keyword]
                else:
                    self.options = cat_dict[category]
            else:
                help_print_arg('Could not find your keyword')

        else:
            help_print_arg('Could not find your category')
            self.options = cat_dict.keys()
Ejemplo n.º 10
0
def concat_and_or_write(df_all, path, path_parq=True, path_gz=False, to_parq=True,
                        to_gz=False, from_parq=True, from_gz=False, verb=False):
    """Concat and write to parquet and/or gzip file."""

    if '.parquet' in str(path_parq):
        from_parq = True
    if '.gz' in str(from_gz):
        from_gz = True

    if verb:
        help_print_arg(path)

    if Path(path).exists() and from_parq:
        df_old = pd.read_parquet()
        df_all = pd.concat([df_old, df_all]).reset_index(drop=True)
        df_all.to_parquet(path)
    elif Path(path).exists() and from_gz:
        df_old = pd.read_json(path, compression='gzip')
        df_all = pd.concat([df_old, df_all]).reset_index(drop=True)
        df_all.to_json(path, compression='gzip')
    else:
        if to_parq:
            df_all.to_parquet(path)
        elif to_gz:
            df_all.to_json(path, compression='gzip')
Ejemplo n.º 11
0
    def __init__(self,
                 followup=False,
                 testing=False,
                 options=True,
                 other=False):
        self.testing, self.options, self.other = testing, options, other
        self.proceed = True
        proxies = get_sock5_nord_proxies()

        if followup and self.options:
            # self.sym_df = yoptions_still_needed()
            self.sym_df = get_yoptions_unfin()
        elif not followup and self.options:
            self.sym_df = get_cboe_ref(ymaster=True)
            # Check if no further data needed
            if self.sym_df.empty:
                self.proceed = False
        elif not followup and other == 'yinfo':
            self.sym_df = get_all_symbol_ref()
        else:
            help_print_arg('No SetUpYahooOptions __init__ condition satisfied')

        if self.proceed:  # Default True
            comb_df = self.get_bins_and_combine(self, proxies)
            self.initiate_for_loop(self, comb_df)
Ejemplo n.º 12
0
 def send_text_messages(cls, self):
     """Send text messages to myself with relevant data."""
     for key, msg in self.msg_dict.items():
         if msg:
             send_twilio_message(msg=msg)
         elif self.testing:
             help_print_arg("AnalyzeSecRss: testing msg send func")
             help_print_arg(str(msg))
Ejemplo n.º 13
0
    def get_rss_feed(cls, self):
        """Request and retry to get data from sec."""
        get = requests.get(self.url, headers=self.headers)
        if get.status_code >= 400:
            get = requests.get(self.url, headers=self.headers)
            if get.status_code >= 400:
                help_print_arg('SEC RSS Feed: 2nd get request failed')

        self.df = pd.read_xml(get.content, xpath='.//item')
Ejemplo n.º 14
0
    def get_data(cls, self, base_url, headers):
        """Get Alpaca symbol reference data."""
        url = f"{base_url}/assets"
        get_ref = requests.get(url, headers=headers)

        if get_ref.status_code < 400:
            df = pd.DataFrame(get_ref.json())
            self.df = dataTypes(df).df
        else:
            help_print_arg(get_ref.content)
Ejemplo n.º 15
0
    def apca_get_data(cls, self, testing):
        """Start long running apca historical data request."""
        kwargs = {'sym_list': self.df['symbol'].tolist()}

        if testing:
            kwargs['sym_list'] = self.df['symbol'].sample(n=10).tolist()
            help_print_arg(kwargs)
            rate_limit(ApcaHist, testing=True, **kwargs)
        else:
            rate_limit(ApcaHist, testing=False, **kwargs)

        return True
Ejemplo n.º 16
0
def execute_iex_stats(df, testing=False):
    """Task_functions loop for individual bin."""
    # Df is in json format because it's being passed from a celery task
    df = pd.read_json(df)

    for index, row in df.iterrows():
        try:
            get_daily_stats(row)
        except Exception as e:
            help_print_arg(
                f"Daily Stats Error: symbol - {row['symbol']} - {str(e)}")
        if testing:
            break
Ejemplo n.º 17
0
def yoptions_drop_hist_dupes():
    """Cycle through yoptions hist and drop duplicates."""
    dt = getDate.query('cboe')
    yr = dt.year
    path = Path(baseDir().path, 'derivatives/end_of_day/', str(yr))
    fpaths = list(path.glob('**/*.parquet'))

    for fpath in tqdm(fpaths):
        try:
            df = pd.read_parquet(fpath)
            df.drop_duplicates(subset=['contractSymbol', 'date'], inplace=True)
            write_to_parquet(df, fpath)
        except Exception as e:
            help_print_arg(e)
Ejemplo n.º 18
0
 def __init__(self, sym, current_day=True, ytd=False, testing=False):
     self.assign_variables(self, current_day, ytd, testing)
     self.construct_fpath(self, sym)
     # Construct parameters for request
     headers, url, params = self.construct_params(self, sym)
     # Use params to get data from Alpaca. Default is today's data
     self.get_data(self, headers, url, params)
     # If file exists, concat, otherwise just clean
     self.clean_concat_data(self)
     # Write to local parquet file
     if isinstance(self.df, pd.DataFrame):
         self.write_to_parquet(self)
     else:
         help_print_arg(f"Data Collection for symbol {sym} failed")
Ejemplo n.º 19
0
    def initiate_exec(cls, self, cs_adr):
        """Initiate execution of cs_adr loop through."""
        bins = cs_adr['bins'].unique().tolist()
        args = [cs_adr[cs_adr['bins'] == n] for n in iter(bins)]
        for arg in args:
            try:
                from app.tasks import execute_func
                kwargs = {'df': arg.to_json()}
                execute_func.delay('execute_iex_stats', **kwargs)
            except ModuleNotFoundError:
                execute_iex_stats(arg.to_json())
                help_print_arg('Execute yahoo options not found')

        # 15 minutes in the future, combine all company stats info
        # All previous symbols are assumed to have data at that point
        execute_func.apply_async(args=['combine_stats'], countdown=900)
Ejemplo n.º 20
0
def scraped_ee_dates(verbose=False, hist=False, current_year=True):
    """Start for loop of dates to get future/past analyst estimates."""
    dt = getDate.query('iex_eod')
    bdays, pos_days = None, None

    if (365 - dt.timetuple().tm_yday) > 15:
        bdays = getDate.get_bus_days(this_year=True)
    else:
        bdays = getDate.get_bus_days(this_year=False)
        bdays = bdays[bdays['date'].dt.year >= dt.year].copy()

    bdays['current_date'] = pd.to_datetime(getDate.query('iex_close'))
    bdays['bday_diff'] = (getDate.get_bus_day_diff(
                          bdays, 'current_date', 'date'))

    if hist and not current_year:
        pos_days = bdays[bdays['bday_diff'] < 15].copy()
    elif hist and current_year:
        cond1 = (bdays['bday_diff'] < 15)
        cond2 = (bdays['date'].dt.year == dt.year)
        pos_days = bdays[cond1 & cond2].copy()
    else:
        pos_days = bdays[bdays['bday_diff'].between(0, 15)].copy()

    bpath = Path(baseDir().path, 'economic_data', 'analyst_earnings')
    fpath_dir = bpath.joinpath(f"_{str(dt.year)}")

    pos_days['fpath'] = (pos_days.apply(lambda row:
                         f"{fpath_dir}/_{str(row['date'].date())}.parquet",
                                        axis=1))

    pos_days['fpath_exists'] = (pos_days['fpath'].astype(str)
                                .map(os.path.exists))
    dt_need = pos_days[~pos_days['fpath_exists']]

    dt_list = []

    for dt in dt_need['date']:
        try:
            ScrapedEE(dt=dt.date())
            sleep(randint(5, 15))
            dt_list.append(dt.date())
        except Exception as e:
            help_print_arg(f"scraped_ee_dates {type(e)} {str(e)}")

    if verbose:
        help_print_arg(str(dt_list))
Ejemplo n.º 21
0
def collect_rest_of_yoptions():
    """After a period of time, collect rest of data."""
    # Follow up to the first sequence of requests
    path = Path(baseDir().path, 'derivatives/end_of_day/unfinished')
    paths = list(path.glob('*.parquet'))

    for fpath in paths:
        df = pd.read_parquet(fpath)
        if df.empty:
            os.remove(fpath)
        else:
            try:
                from app.tasks import execute_func
                kwargs = {'df': df.to_json()}
                execute_func.delay('execute_yoptions', **kwargs)
            except ModuleNotFoundError:
                help_print_arg('Execute yahoo options not found')
Ejemplo n.º 22
0
    def __init__(self, sym, testing=False, last_month=False, previous=False):
        self.testing = testing
        self.last_month, self.previous = last_month, previous
        # Check if no data path exists - default get ytd
        self.check_existing(self, sym)
        self.get_iex_params(self, sym)

        if last_month or previous:
            self.get_last_range(self, sym)
        elif self.need_data:
            if self.need_ytd:  # If ytd data needed
                self.get_ytd(self)
            else:  # If exact dates needed
                self.get_exact_dates(self)
        else:
            msg = 'HistPricesV2: None of the __init__ conditions satisfied'
            help_print_arg(msg)
            raise NameError
Ejemplo n.º 23
0
    def _request_data_and_store(cls, self):
        """Request data and convert to dataframe. Write locally."""

        get = (requests.get(self.url,
                            headers=self.headers,
                            params=self.payload))

        if get.status_code < 400:
            df = pd.DataFrame(get.json()['data']['rows'])
            df['date'] = self.dt
            self.df = df
            CleanScrapedEE(df, self.fpath)
            # write_to_parquet(df, self.fpath)
        else:
            msg = f"Scraped EE failed with msg {str(get.content)}"
            help_print_arg(msg)

        self.get = get
Ejemplo n.º 24
0
def paths_combine_dataframes(dirs, cb_path='', cb_all_path='', verbose=False):
    """Read dataframes and combine into combined, combined_all fpaths."""
    df_list = []

    for dir in dirs:
        path_list = dir.glob('**/*.parquet')
        for f in path_list:
            df_list.append(pd.read_parquet(f))

    df_all = pd.concat(df_list)

    df_cb = df_all[df_all['date'] == df_all['date'].max()]

    if verbose:
        msg = f"paths_combine_dataframes {str(df_cb['date'].max())}"
        help_print_arg(msg)

    write_to_parquet(df_cb, cb_path)
    write_to_parquet(df_all, cb_all_path)
Ejemplo n.º 25
0
def execute_yahoo_func(df, which='yinfo', verbose=False, **kwargs):
    """Execute for loop. Run from tasks execute_function."""
    # Df is in json format because it's being passed from a celery task
    df = pd.read_json(df)

    # If which function to execute is passed
    if 'which' in kwargs.keys():
        which = kwargs['which']

    # Define function dict and unfinished fpath dir to store unfinished symbols
    func_dict = {'yinfo': ysymbols_info}
    unfin_dict = ({
        'yinfo': 'tickers/info/unfinished',
        'yoptions': 'derivatives/end_of_day/unfinished'
    })

    # Add all index/row errors to dict for future use
    error_dict = {}

    for index, row in df.iterrows():
        try:
            if which == 'yinfo':
                func_dict[which](row['symbol'])
        except SOCKS5AuthError as sae:
            # Print error
            help_print_arg(
                f"Execute Yahoo Func: Socks 5 AuthError: {str(sae)}")
            try:
                time.sleep(.5)
                if which == 'yinfo':
                    func_dict[which](row['symbol'])
            except Exception as e:  # End loop
                break
        except TypeError as te:
            error_dict[index] = row
            if verbose:
                help_print_arg(f"Execute yahoo func: TypeError: {str(te)}")
                help_print_arg(f"{str(index)}: {str(row)}")
        except Exception as e:
            error_dict[index] = row
            if verbose:
                help_print_arg(f"Execute yahoo func: Gen Excp: {str(e)}")

    try:
        # Create dataframe from error dict
        df_errors = pd.DataFrame.from_dict(error_dict).T
        df_unfin = pd.concat([df_errors, df.iloc[index:]]).copy()
        # Define path to write file
        path = Path(baseDir().path, unfin_dict[which],
                    f"df_bin{row['bins']}.parquet")
        df_unfin.to_parquet(path)
    except UnboundLocalError:
        pass
Ejemplo n.º 26
0
def get_nasdaq_symbol_changes():
    """Get symbol change history from nasdaq."""
    sym_change_url = 'https://api.nasdaq.com/api/quote/list-type-extended/symbolchangehistory'

    nasdaq_headers = ({
        'Host': 'api.nasdaq.com',
        'User-Agent':
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:94.0) Gecko/20100101 Firefox/94.0',
        'Accept': 'application/json, text/plain, */*',
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate, br',
        'Origin': 'https://www.nasdaq.com',
        'DNT': '1',
        'Connection': 'keep-alive',
        'Referer': 'https://www.nasdaq.com/',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'Sec-GPC': '1',
        'Pragma': 'no-cache',
        'Cache-Control': 'no-cache'
    })

    get = requests.get(sym_change_url, headers=nasdaq_headers)

    df_sym_change = None
    if get.status_code == 200:
        df_sym_change = (pd.DataFrame(
            get.json()['data']['symbolChangeHistoryTable']['rows']))
    else:
        msg1 = 'get_nasdaq_symbol_changes failed with url'
        msg2 = f"and status code {str(get.status_code)}"
        help_print_arg(f"{msg1} {sym_change_url} {msg2}")

    dt = getDate.query('iex_close')
    path = (Path(baseDir().path, 'ref_data/symbol_ref/symbol_changes',
                 f'_{dt}.parquet'))

    if isinstance(df_sym_change, pd.DataFrame):
        write_to_parquet(df_sym_change, path)
    else:
        raise Exception
Ejemplo n.º 27
0
    def _file_change_loop(cls, self, fpath_list):
        """Start the for loop for file type change."""
        exc_list = []
        fpath_exc_list = []
        gc.set_threshold(100, 5, 5)

        for fpath in tqdm(fpath_list):
            size = os.path.getsize(str(fpath)) / 1000000
             # If size < 250 mb
            if size < 250 and 'apca' not in str(fpath) and 'intraday' not in str(fpath):
                try:
                    self._read_write_file(self, fpath)
                except Exception as e:
                    msg = f"{type(e)} : {str(fpath)} : {str(e)}"
                    help_print_arg(msg)
                    fpath_exc_list.append(fpath)
                    exc_list.append(msg)

        self.exc_list += exc_list
        self.fpath_exc_list += fpath_exc_list
Ejemplo n.º 28
0
    def get_all_int_syms(cls, self):
        """Loop through exchanges and get all international symbols."""
        df_list = []

        for exch in self.exch_df['exchange'].tolist():
            url = f"/ref-data/exchange/{exch}/symbols"
            try:
                ud = urlData(url)
                df = ud.df.copy()
                df_list.append(df)
                sleep(.5)
            except Exception as e:  # If error, print error and exchange to console
                msg_1 = f"IntSyms Error getting data for exchange {exch}"
                msg_2 = f" with error type: {type(e)} and error: {str(e)}"
                msg_3 = f"Url: {url} get.status_code: {ud.get.status_code} message: {ud.get.text}"
                help_print_arg(f"{msg_1}{msg_2}")
                help_print_arg(msg_3)

        df_all = pd.concat(df_list).reset_index(drop=True)
        self.all_int_syms = df_all
Ejemplo n.º 29
0
def combine_all_intraday_data(minute='minute_1'):
    """Combine all intraday data, write to file."""
    dt = getDate.query('iex_eod')
    path = Path(baseDir().path, 'intraday', minute, str(dt.year))
    fpaths = list(path.glob('**/*.parquet'))

    df_list = []
    for fpath in fpaths:
        try:
            df_list.append(pd.read_parquet(fpath))
        except Exception as e:
            msg = f"fpath: {str(fpath)} reason: {str(e)}"
            help_print_arg(msg)

    df_all = pd.concat(df_list)
    fpre = f'combined_all/{minute}/'
    fsuf = f"{fpre}_{dt}.parquet"
    path_to_write = path.parent.parent.joinpath(fsuf)

    write_to_parquet(df_all, path_to_write)
def get_yf_loop_missing_hist(key='less_than_20', cs=False, sym_list=None, verb=False, refresh_missing_dates=True):
    """Get less_than_20 syms and call GetYfMissingDates."""

    if sym_list:
        pass
        if verb:
            help_print_arg('get_yf_loop_missing_hist: sym_list assumed')
    elif key == 'get_ignore_ytd':
        df_all = read_clean_combined_all()
        dt = getDate.query('iex_eod')
        df_year = df_all[df_all['date'].dt.year == dt.year].copy(deep=True)
        vc = df_year.value_counts(subset='symbol', ascending=False)
        syms_one_miss = vc[(vc < (vc.max() - 1)) & (vc > 0)].index
        sym_list = syms_one_miss.tolist()
        if verb:
            help_print_arg('get_yf_loop_missing_hist: key==get_ignore_ytd : syms_one_miss')
    elif cs is True:
        if refresh_missing_dates:
            MissingHistDates(cs=True)
        bpath = Path(baseDir().path, "StockEOD/missing_dates/all")
        fpath = get_most_recent_fpath(bpath)
        df_dates = pd.read_parquet(fpath)
        # Get all symbols, reduce to common stock and adr's
        sym_list = df_dates['symbol'].unique().tolist()
        if verb:
            help_print_arg('get_yf_loop_missing_hist: cs=True')
    else:
        if refresh_missing_dates:
            MissingHistDates()
        bpath = Path(baseDir().path, f"StockEOD/missing_dates/{key}")
        fpath = get_most_recent_fpath(bpath)
        df_dates = pd.read_parquet(fpath)
        sym_list = df_dates['symbol'].unique().tolist()
        if verb:
            help_print_arg('get_yf_loop_missing_hist: sym_list from missing_dates/key')
    for sym in tqdm(sym_list):
        try:
            GetYfMissingDates(sym=sym)
        except Exception as e:
            help_print_arg(f"get_yf_loop_missing_hist error: {str(e)}")