Ejemplo n.º 1
0
def create_new_ticker_markets(tick_db=pd.DataFrame([])):
    """Function to find tickers whcih have not bee seen before and add to the
    database.
    
    args:
    ----
    tick_ftse - pandas dataframe - all the scraped tickers

    returns:
    ----
    pandas dataframe - extract from database afetr update
    """
    if not tick_db.shape[0]:
        tick_db = sqlaq_to_df(ticker.fetch())
    #Check if ticker mrket exists, if not add it to the ticker_market table
    tick_market_db = sqlaq_to_df(ticker_market.fetch())
    #find ticker markets which don't exist
    new_tick_market = pd.merge(tick_db.rename(columns={"id": "ticker_id"}),
                               tick_market_db[["id", "ticker_id"]],
                               on=["ticker_id"],
                               how="left")
    new_tick_market = new_tick_market[new_tick_market.id.isnull()]
    log.info(f"{new_tick_market.shape[0]} items to add to ticker_market")
    #add to db
    ticker_market.add_df(new_tick_market)
    #fetch updated table
    tick_market_db = sqlaq_to_df(ticker_market.fetch())
    return tick_market_db
Ejemplo n.º 2
0
def create_new_tickers(tick_scrape):
    """Function to find tickers whcih have not bee seen before and add to the
    database.
    
    args:
    ----
    tick_ftse - pandas dataframe - all the scraped tickers

    returns:
    ----
    pandas dataframe - extract from database afetr update
    """
    #Check if ticker exists, if not add it to the ticker table
    tick_db = sqlaq_to_df(ticker.fetch())
    #add the id to the tick_ftse table
    new_tick = pd.merge(tick_scrape,
                        tick_db[["id", "ticker"]],
                        on=["ticker"],
                        how="left")
    #find tickers which don't exist
    new_tick = new_tick[new_tick.id.isnull()]
    log.info(f"{new_tick.shape[0]} items to add to ticker")
    #add to db
    ticker.add_df(new_tick)
    #fetch updated table
    tick_db = sqlaq_to_df(ticker.fetch())
    return tick_db
def process_weekly_prices(ticker_id, split_from_date=None, split_to_date=None):
    """Function to fetch prices for a ticker between selected dates, then
    split into update and add records, then perform those split/add functions 
    on the db.
    
    args:
    ----
    ticker_id - int - the ticker id in the db
    split_from_date - datetime - the date to start the split
    split_to_date - datetime - the date to end the split
    log - logger
    """
    #Get new price data if neccesary
    update_df, append_df = split_week_prices(
        ticker_ids=[ticker_id],
        from_date=split_from_date,
        to_date=split_to_date,
    )

    #Update existing records
    weekly_price.update_df(update_df)
    log.info(
        f"\nUPDATED {update_df.shape[0]} RECORDS IN weekly_price: \n\tFROM {update_df.date.min()} \n\tTO {update_df.date.max()}"
    )

    #Add new prices to the sql database
    weekly_price.add_df(append_df)
    log.info(
        f"\nADDED {append_df.shape[0]} NEW RECORDS TO weekly_price: \n\tFROM {append_df.date.min()} \n\tTO {append_df.date.max()}"
    )
def get_tickers():
    log.info("\nSCRAPPING TICKERS")
    #This section will scrap the ticker values for the FTSE 100 and FTSE 250 and store them in dataframes 'tick_ftse100' and 'tick_ftse250'.
    #Finally concatenate into 1 dataframe 'tick_ftse'.

    #{Perform async scrape
    log.info("\nFTSE 100")
    tick_ftse100 = ScrapeTickers("ftse100").scrape()

    #Collect the rows of data
    log.info("\nFTSE 250")
    tick_ftse250 = ScrapeTickers("ftse250").scrape()

    #Combine into 1 dataframe
    tick_ftse = pd.concat([tick_ftse100, tick_ftse250])
    tick_ftse.sort_values(['ticker'])
    tick_ftse['ticker'] = [
        re.sub(r'(?=[0-9A-Z])*\.(?=[0-9A-Z]+)', '-', tick)
        for tick in tick_ftse['ticker']
    ]
    tick_ftse['ticker'] = [
        re.sub(r'[^0-9A-Z\-]', '', tick) for tick in tick_ftse['ticker']
    ]
    tick_ftse['last_seen_date'] = dt.date.today()

    return tick_ftse
Ejemplo n.º 5
0
 def process_soup(self, soup):
     if soup == "":
         log.info('no results returned')
         #return to say that there has been an error
         return []
     #Grab the header rows
     header = refine_soup(soup, [{
         "name": "table",
         "attrs": {
             'data-test': 'historical-prices'
         }
     }, {
         "name": "thead"
     }, {
         "name": "tr"
     }])[0]
     cols = [clean_col_name(th.text) for th in header]
     #Grab the data rows
     rows = refine_soup(soup, [{
         "name": "table",
         "attrs": {
             'data-test': 'historical-prices'
         }
     }, {
         "name": "tbody"
     }, {
         "name": "tr"
     }])
     #If there are no dates there's no point going back further
     if len(rows) == 0:
         log.info('No more records to collect')
         return []
     #Put the rows into the dataframe
     data = []
     for r in rows:
         td = refine_soup(r, [{"name": 'td'}])
         if len(td) == len(cols):
             data.append({c: x.text for c, x in zip(cols, td)})
     return data
def process_daily_prices(ticker,
                         ticker_id,
                         st_date=None,
                         en_date=None,
                         split_from_date=None,
                         split_to_date=None):
    """Function to scrape prices for a ticker between selected dates, then
    split into update and add records, then perform those split/add functions 
    on the db.
    
    args:
    ----
    ticker - str - the ticker to use in scrape
    ticker_id - int - the ticker id in the db
    st_date - datetime - the date to start the scrape
    en_date - datetime - the date to end the scrape
    split_from_date - datetime - the date to start the split
    split_to_date - datetime - the date to end the split
    log - logger
    """
    #Get new price data if neccesary
    if not st_date or st_date < en_date:
        check, new_prices_df = get_day_prices(
            ticker,
            st_date,
            en_date,
        )
        if check:
            new_prices_df['ticker_id'] = ticker_id
            update_df, append_df = split_day_prices(new_prices_df,
                                                    ticker_ids=[ticker_id],
                                                    from_date=split_from_date,
                                                    to_date=split_to_date)
            #Update existing prices in the sql database
            daily_price.update_df(update_df)
            log.info(
                f"\nUPDATED {update_df.shape[0]} RECORDS IN daily_price: \n\tFROM {update_df.date.min()} \n\tTO {update_df.date.max()}"
            )
            #Add new prices to the sql database
            daily_price.add_df(append_df)
            log.info(
                f"\nADDED {append_df.shape[0]} NEW RECORDS TO daily_price: \n\tFROM {append_df.date.min()} \n\tTO {append_df.date.max()}"
            )
        else:
            log.info('No new records found')
    else:
        log.info('No new records to collect')
Ejemplo n.º 7
0
 def process_soup(self, soup):
     if soup == "":
         log.info('no results returned')
         #return to say that there has been an error
         return []
     #Get the data rows
     rows = refine_soup(soup, [
         {
             "name": "table"
         },
         {
             "name": "tbody"
         },
         {
             "name": "tr"
         },
     ])
     #Grab the dates
     dates = [
         refine_soup(r, [{
             "name": "td"
         }, {
             "name": "span"
         }]) for r in rows
     ]
     dates = [d[0].text for d in dates if len(d) > 0]
     #Grab the labels
     labels = [
         refine_soup(r, [{
             "name": "td"
         }, {
             "name": "a"
         }]) for r in rows
     ]
     labels = [l[0].text for l in labels if len(l) > 0]
     return list(zip(dates, labels))
def filter_stocks(from_date=None, to_date=None):
    """Function to search for shares to buy
    
    args:
    ----
    from_date - datetime:None - a bounding minimum date (if neccesary)
    to_date - datetime:None - a bounding maximum date (if neccesary)
    
    returns:
    ----
    pandas dataframe
    """
    #Fetch prices
    prices_df = sqlaq_to_df(
        daily_price.fetch(from_date=from_date, to_date=to_date))
    ticker_df = sqlaq_to_df(ticker.fetch()) \
        .rename(columns={"id":"ticker_id"})

    #Filter to keep only items which are current
    max_date = prices_df.date.max()
    ticks = prices_df[prices_df.date == max_date] \
        .ticker_id \
        .drop_duplicates()
    ticks = pd.merge(ticks.to_frame(),
                     ticker_df[["ticker_id", "ticker"]],
                     on=["ticker_id"])

    #Setup variables
    buy = []
    sell = []

    prices_df = prices_df.sort_values(['ticker_id','date']) \
        .reset_index(drop=True)

    #Loop ticks and get results
    for _, r in tqdm(ticks.iterrows(),
                     total=ticks.shape[0],
                     desc="Loop stock to find buy signals"):
        tick_prices = prices_df[prices_df.ticker_id == r.ticker_id]
        dataset = DataSet()
        dataset.add_dataset(tick_prices.close, "close")
        #Calculate the short macd
        _, _, _, _, macd_short = dataset.close.calc_macd(ema_lng=26,
                                                         ema_sht=12,
                                                         sig_period=9)
        dataset.add_dataset(macd_short, "macd_short")
        #Normalise it
        macd_short = dataset.macd_short.norm_data(dataset.close.data)
        dataset.add_dataset(macd_short, "macd_short")
        #Calculate the long macd
        _, _, _, _, macd_long = dataset.close.calc_macd(ema_lng=26 * 5,
                                                        ema_sht=12 * 5,
                                                        sig_period=9 * 5)
        dataset.add_dataset(macd_long, "macd_long")
        #Normalise it
        macd_short = dataset.macd_long.norm_data(dataset.close.data)
        dataset.add_dataset(macd_long, "macd_long")
        #Find the previous major macd high
        #Find the short gradient since this high to the current position
        #Find the previous major macd low
        #Find the short gradient since this low to the current position
        #Calc gradients of macds
        grad_macd_short = dataset.macd_short.calc_grad()
        dataset.add_dataset(grad_macd_short, "grad_macd_short")
        grad_macd_long = dataset.macd_long.calc_grad()
        dataset.add_dataset(grad_macd_long, "grad_macd_long")
        #Identify if it is a buy signal
        check1 = (dataset.grad_macd_short.data.iloc[-1] > 0 \
            and dataset.grad_macd_short.data.iloc[-2] < 0
            and dataset.grad_macd_long.data.iloc[-1] > 0)
        if check1:
            buy.append({
                "ticker":
                r.ticker,
                "ticker_id":
                r.ticker_id,
                "short_grad_pre":
                dataset.grad_macd_short.data.iloc[-2],
                "short_grad_post":
                dataset.grad_macd_short.data.iloc[-1],
                "short_grad_change":
                abs(dataset.grad_macd_short.data.iloc[-2]) +
                abs(dataset.grad_macd_short.data.iloc[-1]),
                "long_grad":
                dataset.grad_macd_long.data.iloc[-1],
                "macd_long":
                dataset.macd_long.data.iloc[-1]
            })

    #Put into a dataframe
    buy_df = pd.DataFrame(buy)
    if buy_df.shape[0]:
        buy_df = buy_df.sort_values(["long_grad"], ascending=[False])

    log.info(f"{buy_df.shape[0]} opportunities found")

    return buy_df


# import numpy as np
# import plotly.graph_objects as go
# from plotly.subplots import make_subplots
# for c in ['macd','real_macd_min','prev_min_macd','prev_min_macd_grad','real_macd_max','prev_max_macd','prev_max_macd_grad']:
#     prices_d_df[c] = np.nan

# for tick in tqdm(prices_d_df.ticker.unique(),total=prices_d_df.ticker.unique().shape[0]):
#     tick_df = prices_d_df.loc[prices_d_df.ticker == tick,:]
#     tick_df = calc_ema_macd(tick_df)
#     tick_df['real_macd_min'] = flag_mins(tick_df['macd'],_period=1,_cur=False)
#     tick_df['real_macd_max'] = flag_maxs(tick_df['macd'],_period=1,_cur=False)
#     ### MINS ###
#     #Find the last 2 mins
#     tick_df["prev_min_macd"],tick_df["prev_min_macd_date"],tick_df["prev_min_macd_index"] = prev_max_min(tick_df[["date",'macd',"real_macd_min"]].copy(),'macd',"real_macd_min",1)
#     tick_df["prev_min_macd_change"] = mk_prev_move_float(tick_df['prev_min_macd'])
#     tick_df["prev_min_macd_index_change"] = mk_prev_move_float(tick_df['prev_min_macd_index'])
#     #Calc the gradient
#     tick_df['prev_min_macd_grad'] = tick_df["prev_min_macd_change"] / tick_df["prev_min_macd_index_change"]
#     ### MAXS ###
#     #Find the last 2 maxs
#     tick_df["prev_max_macd"],tick_df["prev_max_macd_date"],tick_df["prev_max_macd_index"] = prev_max_min(tick_df[["date",'macd',"real_macd_max"]].copy(),'macd',"real_macd_max",1)
#     tick_df["prev_max_macd_change"] = mk_prev_move_float(tick_df['prev_max_macd'])
#     tick_df["prev_max_macd_index_change"] = mk_prev_move_float(tick_df['prev_max_macd_index'])
#     #Calc the gradient
#     tick_df['prev_max_macd_grad'] = tick_df["prev_max_macd_change"] / tick_df["prev_max_macd_index_change"]
#     prices_d_df.loc[prices_d_df.ticker == tick,:] = tick_df

# #Filter to signal items
# buy_mask = (prices_d_df.date == prices_d_df.date.max()) & (prices_d_df.prev_min_macd_grad > 0) & (prices_d_df.macd > prices_d_df.macd.shift(1)) & (prices_d_df.macd.shift(1) < prices_d_df.macd.shift(2))
# buy_df = prices_d_df[buy_mask]
# buy_df['signal'] = 'BUY'

# sell_mask = (prices_d_df.date == prices_d_df.date.max()) & (prices_d_df.prev_min_macd_grad < 0) & (prices_d_df.macd < prices_d_df.macd.shift(1)) & (prices_d_df.macd.shift(1) > prices_d_df.macd.shift(2))
# sell_df = prices_d_df[sell_mask]
# sell_df['signal'] = 'SELL'

# print(f"COUNT BUY -> {buy_df.shape[0]}")
# print(f"COUNT SELL -> {sell_df.shape[0]}")
# display(buy_df)
# display(sell_df)

# ft_eng_w_df = ft_eng_w_df[['ticker','date','close','macd','prev_min_macd_grad']]
# ft_eng_w_df['open'] = ft_eng_w_df.close
# ft_eng_w_df['high'] = ft_eng_w_df.close
# ft_eng_w_df['low'] = ft_eng_w_df.close

# ft_eng_w_df = ft_eng_w_df.sort_values(['ticker','date']).reset_index(drop=True)

# tick = 'BAB'
# tmp_df = ft_eng_w_df[ft_eng_w_df.ticker == tick]
# # tmp_df = calc_ema_macd(tmp_df)

# fig = make_subplots(rows=2,cols=1,specs=[[{'secondary_y':False}],[{'secondary_y':True}]])
# #Chart 1
# fig.add_trace(
#     go.Ohlc(
#         x=tmp_df.date,
#         open=tmp_df.open,
#         high=tmp_df.high,
#         low=tmp_df.low,
#         close=tmp_df.close,
#         name='OHLC'
#     ),
#     row=1,col=1
# )
# # fig.add_trace(
# #     go.Scatter(
# #         x=tmp_df.date,
# #         y=tmp_df.ema12,
# #         name='ema12'
# #     ),
# #     row=1,col=1
# # )
# # fig.add_trace(
# #     go.Scatter(
# #         x=tmp_df.date,
# #         y=tmp_df.ema26,
# #         name='ema26'
# #     ),
# #     row=1,col=1
# # )

# #Chart 2
# fig.add_trace(
#     go.Bar(
#         x=tmp_df[tmp_df.macd > 0].date,y=tmp_df[tmp_df.macd > 0].macd,
#         marker_color='green'
#     ),
#     row=2,col=1
# )
# fig.add_trace(
#     go.Bar(
#         x=tmp_df[tmp_df.macd < 0].date,y=tmp_df[tmp_df.macd < 0].macd,
#         marker_color='red'
#     ),
#     row=2,col=1
# )
# # fig.add_trace(
# #     go.Scatter(
# #         x=tmp_df.date,
# #         y=tmp_df.macd_line,
# #         name='macd line'
# #     ),
# #     row=2,col=1,secondary_y=True
# # )
# # fig.add_trace(
# #     go.Scatter(
# #         x=tmp_df.date,
# #         y=tmp_df.signal_line,
# #         name='signal line'
# #     ),
# #     row=2,col=1,secondary_y=True
# # )

# #Establish range selector and buttons
# rng_sel_di = dict(
#     buttons=list([
#         dict(count=1,
#              label="1m",
#              step="month",
#              stepmode="backward"),
#         dict(count=6,
#              label="6m",
#              step="month",
#              stepmode="backward"),
#         dict(count=1,
#              label="YTD",
#              step="year",
#              stepmode="todate"),
#         dict(count=1,
#              label="1y",
#              step="year",
#              stepmode="backward"),
#         dict(count=5,
#              label="5y",
#              step="year",
#              stepmode="backward"),
#         dict(count=3,
#              label="3y",
#              step="year",
#              stepmode="backward"),
#         dict(step="all")
#     ])
# )
# for axis in ['xaxis'
#              ,'xaxis2'
#             ]:
#     fig.layout[axis].rangeselector=rng_sel_di
#     fig.layout[axis].rangeslider.visible=False
# # fig.layout.yaxis.domain = [0.7,1.0]
# # fig.layout.yaxis2.domain = [0.0,0.3]
# fig.update_yaxes(automargin=True)
# fig.update_layout(
#     title=f'Charts for {tick}'
# )

# fig.show()
# display(ft_eng_w_df[ft_eng_w_df.ticker == tick][['ticker','date','close','ema26','macd','prev_min_macd_grad']].tail(15))
Ejemplo n.º 9
0
sell = []

prices_df = prices_df.sort_values(['ticker_id','date']) \
    .reset_index(drop=True)

#Loop ticks and get results
for _, r in tqdm(ticks.iterrows(),
                 total=ticks.shape[0],
                 desc="Loop stock to find buy signals"):
    tick_prices = prices_df[prices_df.ticker_id == r.ticker_id]
    dataset = DataSet()
    dataset.add_dataset(tick_prices.change, "change")
    #Calc consecutive losses
    cons_loses = dataset.change.calc_consec_loss()
    dataset.add_dataset(cons_loses, "cons_loses")
    #Identify if it is a buy signal
    check1 = (dataset.cons_loses.data.iloc[-1] == 0 \
        and dataset.cons_loses.data.iloc[-2] >= 3)
    if check1:
        buy.append({
            "ticker": r.ticker,
            "ticker_id": r.ticker_id,
            "cons_loses": dataset.cons_loses.data.iloc[-2],
        })

#Put into a dataframe
buy_df = pd.DataFrame(buy) \
    .sort_values(["cons_loses"], ascending=[False])

log.info(f"{buy_df.shape[0]} opportunities found")
Ejemplo n.º 10
0
def daily_to_weekly_price_conversion(dp_df):
    """Function to convert the daily prices into weekly prices
    
    args:
    ------
    dp_df - pandas dataframe - the daily prices

    returns:
    ------
    pandas dataframe
    """
    log.info('Converting daily prices to weekly prices')
    #Mark the week identifier
    dp_df['isocalendar'] = [x.isocalendar()[:2] for x in dp_df['date']]
    #Get highs and lows
    high_df = dp_df.loc[dp_df['high'] > 0, ['high','ticker_id','isocalendar']] \
        .groupby(['ticker_id','isocalendar'], as_index=False) \
        .max()
    low_df = dp_df.loc[dp_df['low'] > 0, ['low','ticker_id','isocalendar']] \
        .groupby(['ticker_id','isocalendar'], as_index=False) \
        .min()
    #Get total volume for the week
    vol_df = dp_df.loc[dp_df['volume'] > 0, ['volume','ticker_id','isocalendar']] \
        .groupby(['ticker_id','isocalendar'], as_index=False) \
        .sum()
    #Get max and min week days
    max_wk_day = dp_df.loc[dp_df['close'] > 0, ['date','ticker_id','isocalendar']] \
        .groupby(['ticker_id','isocalendar'], as_index=False) \
        .max()
    min_wk_day = dp_df.loc[dp_df['open'] > 0, ['date','ticker_id','isocalendar']] \
        .groupby(['ticker_id','isocalendar'], as_index=False) \
        .min()
    #Get open price
    open_df = pd.merge(dp_df[['date', 'open']], min_wk_day, on='date')
    #Get close price
    close_df = pd.merge(dp_df[['date', 'close']], max_wk_day, on='date')
    #Form the final df
    wp_df = dp_df[['ticker_id', 'isocalendar']]
    wp_df = pd.merge(wp_df,
                     min_wk_day,
                     on=['ticker_id', 'isocalendar'],
                     how="left")  #date
    wp_df = pd.merge(wp_df,
                     high_df,
                     on=['ticker_id', 'isocalendar'],
                     how="left")  #high
    wp_df = pd.merge(wp_df,
                     low_df,
                     on=['ticker_id', 'isocalendar'],
                     how="left")  #low
    wp_df = pd.merge(wp_df,
                     vol_df,
                     on=['ticker_id', 'isocalendar'],
                     how="left")  #volume
    wp_df = pd.merge(wp_df,
                     open_df[['ticker_id', 'isocalendar', 'open']],
                     on=['ticker_id', 'isocalendar'],
                     how="left")  #open
    wp_df = pd.merge(wp_df,
                     close_df[['ticker_id', 'isocalendar', 'close']],
                     on=['ticker_id', 'isocalendar'],
                     how="left")  #close
    wp_df['change'] = wp_df['close'] - wp_df['open']
    wp_df = wp_df.drop_duplicates() \
        .reset_index(drop=True)
    #Get the monday of each week
    wp_df['date'] = [calc_wk_st_date(x) for x in wp_df.date]
    wp_df = wp_df.drop(columns=['isocalendar'])
    #Fill missing values
    wp_df = wp_df.fillna(0)
    return True, wp_df
Ejemplo n.º 11
0
def full_scrape():
    """Function to perform a full scrape of all available prices"""

    #########################
    ### SCRAPPING TICKERS ###
    #########################
    #scrape the tickers
    tick_ftse = get_tickers()
    #create new records in the ticker table
    tick_db = create_new_tickers(tick_ftse, )
    #update ticker records with last seen date
    tick_ftse = pd.merge(tick_ftse, tick_db[["ticker", "id"]], on="ticker")
    ticker.update_df(tick_ftse)
    #create new records in the ticker_market table
    _ = create_new_ticker_markets(tick_ftse, )
    #Create a list of ticker ids
    ticker_ids = tick_ftse.id.to_list()

    ####################
    ### DAILY PRICES ###
    ####################
    log.info("\nSCRAPPING DAILY PRICES")

    #Make a call for all the latest dates
    latest_dates_df = sqlaq_to_df(
        daily_price.fetch_latest(session, ticker_ids=ticker_ids))
    latest_dates_df["max_date"] = latest_dates_df.max_date.astype("datetime64")
    #Calc the en_date for today
    en_date = calc_en_date()
    if str(CONFIG['web_scrape']['mode']).lower() == 'update':
        latest_dates_df["st_date"] = [
            calc_st_date(v) for v in latest_dates_df.max_date
        ]
    else:
        latest_dates_df["st_date"] = dt.datetime(1970, 1, 1)
        #Delete existing data
        daily_price.remove()

    #Loop through the tickers in tick_ftse and for each one get the latest date of scrape.
    #Convert this date into a timestamp.
    #Scrape all new data and add to the database.
    dp_errors = []
    run_time = ProcessTime()
    for _, r in tqdm(latest_dates_df.iterrows(),
                     total=latest_dates_df.shape[0],
                     desc="Scrape daily prices"):
        log.info(f'\n{len(run_time.lap_li)} RUNNING FOR -> {r.id}, {r.ticker}')
        log.info(f'Latst date - {r.max_date}')
        try:
            #Get new price data if neccesary and add/update the database
            process_daily_prices(r.ticker,
                                 r.id,
                                 st_date=r.st_date,
                                 en_date=en_date,
                                 split_from_date=r.max_date,
                                 split_to_date=None)
        except Exception as e:
            log.error(e)
            dp_errors.append({'ticker': r.ticker, "error": e})
        #Lap
        log.info(run_time.lap())
        log.info(run_time.show_latest_lap_time(show_time=True))
    log.info(f"DAILY SCRAPE RUN TIME - {run_time.end()}")

    #####################
    ### WEEKLY PRICES ###
    #####################
    log.info("\nSCRAPPING WEEKLY PRICES")

    #Make a call for all the latest dates
    latest_dates_df = sqlaq_to_df(
        weekly_price.fetch_latest(session, ticker_ids=ticker_ids))
    latest_dates_df["max_date"] = latest_dates_df.max_date.astype("datetime64")

    #Loop through the tickers in tick_ftse and for each one get the latest date of scrape.
    #Convert this date into a timestamp.
    #Scrape all new data and add to the database.
    wp_errors = []
    run_time = ProcessTime()
    for _, r in tqdm(latest_dates_df.iterrows(),
                     total=latest_dates_df.shape[0],
                     desc="Process weekly prices"):
        log.info(f'\n{len(run_time.lap_li)} RUNNING FOR -> {r.id}, {r.ticker}')
        try:
            #Get new price data if neccesary
            if r.max_date < en_date:
                process_weekly_prices(
                    r.id,
                    split_from_date=r.max_date,
                )
            else:
                log.info('No new records to collect')
                continue
        except Exception as e:
            log.error(e)
            wp_errors.append({'ticker': r.ticker, "error": e})
        #Lap
        log.info(run_time.lap())
        log.info(run_time.show_latest_lap_time(show_time=True))
    log.info('\n\n')
    log.info(f"WEEKLY SCRAPE RUN TIME - {run_time.end()}")

    ####################
    ### PRINT ERRORS ###
    ####################

    log.info(f'\nDAILY ERROR COUNT -> {len(dp_errors)}')
    if len(dp_errors) > 0:
        log.info('DALIY ERRORS ->')
        for e in dp_errors:
            log.error(e)

    log.info(f'\nWEEKLY ERROR COUNT -> {len(wp_errors)}')
    if len(wp_errors) > 0:
        log.info('WEEKLY ERRORS ->')
        for e in wp_errors:
            log.error(e)
Ejemplo n.º 12
0
def get_day_prices(ticker: str, st_date: None, en_date: None):
    """Function fr gtting daily stock prices from webscrapping
    
    args:
    ------
    ticker - str - the identifier for the stock being looked at needs to math Yahoo.co.uk
    sec_ref_li - the list of time periods to scrape

    returns:
    ------
    pandas dataframe - contains all required prices

    """
    log.info(
        f'Getting DAILY prices for -> {ticker} from {str(st_date)} to {str(en_date)}'
    )
    #Perform async scrapes
    tick_df = ScrapePrices(ticker, st_date, en_date).scrape()
    #Check for rows - if none then return
    if not tick_df.shape[0]:
        log.warning("Early exit due to no new records being found")
        return False, None
    #Reformat strings to floats
    tick_df['open'] = [str_to_float_format(v) for v in tick_df.open]
    tick_df['high'] = [str_to_float_format(v) for v in tick_df.high]
    tick_df['low'] = [str_to_float_format(v) for v in tick_df.low]
    tick_df['close'] = [str_to_float_format(v) for v in tick_df.close]
    tick_df['adj_close'] = [str_to_float_format(v) for v in tick_df.adj_close]
    tick_df['volume'] = [str_to_float_format(v) for v in tick_df.volume]
    tick_df['change'] = tick_df.close - tick_df.open
    #Reformat date
    tick_df['date'] = [
        conv_dt(v, date_or_time="short_date") for v in tick_df.date
    ]
    #Add the ticker series
    tick_df['ticker'] = ticker
    #Mark the week identifier
    tick_df['isocalendar'] = [x.isocalendar()[:2] for x in tick_df['date']]
    min_wk_day = tick_df.loc[tick_df['open'] > 0, ['date','isocalendar']] \
        .groupby('isocalendar') \
        .min() \
        .reset_index() \
        .rename(columns={'date':'week_start_date'})
    tick_df = pd.merge(tick_df, min_wk_day, on=['isocalendar'])
    #CLEANING - Remove any rows with no prices
    tick_df = tick_df[tick_df.open > 0]
    #CLEANING - Copy row above where the change has been more than 90%
    tick_df['cl_change'] = (tick_df.close -
                            tick_df.close.shift(1)) / tick_df.close.shift(1)
    mask = tick_df['cl_change'] < -0.9
    tick_df.loc[mask, 'open'] = tick_df.open.shift(-1).copy().loc[mask]
    tick_df.loc[mask, 'close'] = tick_df.close.shift(-1).copy().loc[mask]
    tick_df.loc[mask, 'high'] = tick_df.high.shift(-1).copy().loc[mask]
    tick_df.loc[mask, 'low'] = tick_df.low.shift(-1).copy().loc[mask]
    #Fill missing values
    tick_df = tick_df.fillna(0)
    tick_df = tick_df[[
        'ticker', 'date', 'week_start_date', 'open', 'close', 'high', 'low',
        'change', 'volume'
    ]]
    return True, tick_df