Example #1
0
    def get_dates_or_ytd(cls, self, fpath, dt):
        """Get the exact dates needed or determine full ytd need."""
        # Read local parquet file
        df = pd.read_parquet(fpath)

        # If for some reason the df is empty, or data has an error
        if df.empty:
            os.remove(fpath)
            self.need_ytd = True
        # If the max date is equal to todays date, end sequence
        elif df['date'].max() == dt:
            self.need_data = False
            self.df = df
        # Assuming df exists and is valid
        else:
            # Make a pandas datetime range
            # bd_range = pd.bdate_range(date(dt.year, 1, 2), dt)
            bd_range = getDate.get_bus_days(testing=False, this_year=True)
            bd_range = bd_range[bd_range['date'].dt.date <= dt].copy(deep=True)
            times_need = bd_range['date'][~bd_range['date'].isin(df['date'])]
            dts_need = [bd.date().strftime('%Y%m%d') for bd in times_need]

            if self.testing:
                self.class_print(dts_need)

            # If more than 10 dates are needed, just get YTD
            if len(dts_need) > 10:
                self.need_ytd = True
            else:
                self.dts_need = dts_need
            # Assign dataframe to self.df
            self.df = df
def scraped_ee_dates(verbose=False, hist=False, current_year=True):
    """Start for loop of dates to get future/past analyst estimates."""
    dt = getDate.query('iex_eod')
    bdays, pos_days = None, None

    if (365 - dt.timetuple().tm_yday) > 15:
        bdays = getDate.get_bus_days(this_year=True)
    else:
        bdays = getDate.get_bus_days(this_year=False)
        bdays = bdays[bdays['date'].dt.year >= dt.year].copy()

    bdays['current_date'] = pd.to_datetime(getDate.query('iex_close'))
    bdays['bday_diff'] = (getDate.get_bus_day_diff(
                          bdays, 'current_date', 'date'))

    if hist and not current_year:
        pos_days = bdays[bdays['bday_diff'] < 15].copy()
    elif hist and current_year:
        cond1 = (bdays['bday_diff'] < 15)
        cond2 = (bdays['date'].dt.year == dt.year)
        pos_days = bdays[cond1 & cond2].copy()
    else:
        pos_days = bdays[bdays['bday_diff'].between(0, 15)].copy()

    bpath = Path(baseDir().path, 'economic_data', 'analyst_earnings')
    fpath_dir = bpath.joinpath(f"_{str(dt.year)}")

    pos_days['fpath'] = (pos_days.apply(lambda row:
                         f"{fpath_dir}/_{str(row['date'].date())}.parquet",
                                        axis=1))

    pos_days['fpath_exists'] = (pos_days['fpath'].astype(str)
                                .map(os.path.exists))
    dt_need = pos_days[~pos_days['fpath_exists']]

    dt_list = []

    for dt in dt_need['date']:
        try:
            ScrapedEE(dt=dt.date())
            sleep(randint(5, 15))
            dt_list.append(dt.date())
        except Exception as e:
            help_print_arg(f"scraped_ee_dates {type(e)} {str(e)}")

    if verbose:
        help_print_arg(str(dt_list))
Example #3
0
def get_missing_dates(df):
    """Get missing dates from data frame."""
    # Need columns for symbol, and date, or some other unique identifier.
    bus_days = getDate.get_bus_days(this_year=True)
    dt = getDate.query('iex_eod')
    bus_days = bus_days[bus_days['date'].dt.date <= dt].copy()

    df_dt_list = df['date'].unique()
    dts_missing = bus_days[~bus_days['date'].isin(df_dt_list)].copy()
    dts_missing['dt_format'] = dts_missing['date'].dt.strftime('%Y%m%d')

    return dts_missing
Example #4
0
def get_last_30_intradays():
    """Get last 30 intraday trading days."""
    bsdays = getDate.get_bus_days()

    dt_today = getDate.query('iex_eod')
    dt_30 = dt_today - timedelta(days=30)

    days = (bsdays[(bsdays['date'].dt.date >= dt_30)
            & (bsdays['date'].dt.date <= dt_today)])

    df_m1 = serverAPI('iex_intraday_m1').df
    days_tget = (days[~days['date'].isin(df_m1['date']
                 .unique())].copy())
    # days_tget['dt_fmt'] = days_tget['date'].dt.strftime('%Y%m%d')

    try:
        from app.tasks import execute_func
        for dt in days_tget['date']:
            kwargs = {'dt': dt}
            execute_func.delay('iex_intraday', **kwargs)
    except ModuleNotFoundError:
        pass
Example #5
0
def get_missing_sec_master_idx(sma_df=False):
    """Get missing sec reference data files."""
    # sma_df is the master index file of all dates
    if not isinstance(sma_df, pd.DataFrame):
        sma_df = serverAPI('sec_master_all').df
        sma_df['date'] = pd.to_datetime(sma_df['date'], unit='ms')

    bus_days = getDate.get_bus_days(this_year=True)
    dt = getDate.query('iex_eod')
    bus_days = bus_days[bus_days['date'].dt.date <= dt].copy()

    dts_missing = bus_days[~bus_days['date'].isin(sma_df['date'].unique().
                                                  tolist())].copy()
    dts_missing['dt_format'] = dts_missing['date'].dt.strftime('%Y%m%d')

    for dt in tqdm(dts_missing['dt_format']):
        try:
            smi = secMasterIdx(hist_date=dt)
            sleep(.5)
        except Exception as e:
            msg = f"get_missing_sec_master_idx: {str(e)}"
            help_print_arg(msg)
Example #6
0
def get_most_recent_fpath(fpath_dir, f_pre='', f_suf='', dt='', this_year=True, second=False):
    """Get the most recent fpath in a directory."""
    path_to_return = False
    if not dt:  # If no date passed, default to iex_eod
        dt = getDate.query('iex_close')

    dt_list = getDate.get_bus_days(this_year=this_year)
    date_list = (dt_list[dt_list['date'].dt.date <= dt]
                 .sort_values(by=['date'], ascending=False))


    date_list['fpath'] = (date_list.apply(lambda row:
                                          f"_{row['date'].date()}",
                                          axis=1))

    date_list['fpath_yr'] = (date_list.apply(lambda row:
                                             f"_{row['date'].year}",
                                             axis=1))
    date_list['fpath_fmt'] = (date_list.apply(lambda row:
                                              f"_{row['date'].date().strftime('%Y%m%d')}",
                                              axis=1))

    # Iterate through dataframe to find the most recent
    for index, row in date_list.iterrows():
        tpath = Path(fpath_dir, f"{f_pre}{row['fpath']}{f_suf}.parquet")
        if tpath.exists():
            return tpath

    # Iterate through dataframe to find the most recent
    for index, row in date_list.iterrows():
        tpath = Path(fpath_dir, f"{f_pre}{row['fpath_yr']}{f_suf}.parquet")
        if tpath.exists():
            return tpath

    # Iterate through dataframe to find the most recent
    for index, row in date_list.iterrows():
        tpath = Path(fpath_dir, f"{f_pre}{row['fpath_fmt']}{f_suf}.parquet")
        if tpath.exists():
            return tpath

    """
    if not f_pre and not f_suf:
        for index, row in date_list.iterrows():
            if Path(fpath_dir, f"{row['fpath']}.parquet").exists():
                path_to_return = Path(fpath_dir, f"{row['fpath']}.parquet")
                return path_to_return
    elif f_pre and not f_suf:
        for index, row in date_list.iterrows():
            if Path(fpath_dir, f"{f_pre}{row['fpath']}.parquet").exists():
                path_to_return = Path(fpath_dir, f"{f_pre}{row['fpath']}.parquet")
                return path_to_return
    elif not f_pre and f_suf:
        for index, row in date_list.iterrows():
            if Path(fpath_dir, f"{row['fpath']}{f_suf}.parquet").exists():
                path_to_return = Path(fpath_dir, f"{row['fpath']}{f_suf}.parquet")
                return path_to_return
    elif f_pre and f_suf:
        for index, row in date_list.iterrows():
            if Path(fpath_dir, f"{f_pre}{row['fpath']}{f_suf}.parquet").exists():
                path_to_return = Path(fpath_dir, f"{f_pre}{row['fpath']}{f_suf}.parquet")
                return path_to_return
    """
    if not path_to_return and not second:
        path_to_return = get_most_recent_fpath(fpath_dir, this_year=False, second=True)
        if path_to_return:
            help_print_arg(f"get_most_recent_fpath: first failed. Returning {str(path_to_return)}")
            return path_to_return

    if not path_to_return:
        msg_1 = "Directory empty or path doesn't follow format '_dt.parquet'. Returning first path"
        msg_2 = f": {fpath_dir}"
        help_print_arg(f"{msg_1} {msg_2}")

        paths = list(Path(fpath_dir).glob('*.parquet'))
        if paths:
            path_to_return = paths[-1]
            return path_to_return