def create_new_ticker_markets(tick_db=pd.DataFrame([])): """Function to find tickers whcih have not bee seen before and add to the database. args: ---- tick_ftse - pandas dataframe - all the scraped tickers returns: ---- pandas dataframe - extract from database afetr update """ if not tick_db.shape[0]: tick_db = sqlaq_to_df(ticker.fetch()) #Check if ticker mrket exists, if not add it to the ticker_market table tick_market_db = sqlaq_to_df(ticker_market.fetch()) #find ticker markets which don't exist new_tick_market = pd.merge(tick_db.rename(columns={"id": "ticker_id"}), tick_market_db[["id", "ticker_id"]], on=["ticker_id"], how="left") new_tick_market = new_tick_market[new_tick_market.id.isnull()] log.info(f"{new_tick_market.shape[0]} items to add to ticker_market") #add to db ticker_market.add_df(new_tick_market) #fetch updated table tick_market_db = sqlaq_to_df(ticker_market.fetch()) return tick_market_db
def create_new_tickers(tick_scrape): """Function to find tickers whcih have not bee seen before and add to the database. args: ---- tick_ftse - pandas dataframe - all the scraped tickers returns: ---- pandas dataframe - extract from database afetr update """ #Check if ticker exists, if not add it to the ticker table tick_db = sqlaq_to_df(ticker.fetch()) #add the id to the tick_ftse table new_tick = pd.merge(tick_scrape, tick_db[["id", "ticker"]], on=["ticker"], how="left") #find tickers which don't exist new_tick = new_tick[new_tick.id.isnull()] log.info(f"{new_tick.shape[0]} items to add to ticker") #add to db ticker.add_df(new_tick) #fetch updated table tick_db = sqlaq_to_df(ticker.fetch()) return tick_db
def process_weekly_prices(ticker_id, split_from_date=None, split_to_date=None): """Function to fetch prices for a ticker between selected dates, then split into update and add records, then perform those split/add functions on the db. args: ---- ticker_id - int - the ticker id in the db split_from_date - datetime - the date to start the split split_to_date - datetime - the date to end the split log - logger """ #Get new price data if neccesary update_df, append_df = split_week_prices( ticker_ids=[ticker_id], from_date=split_from_date, to_date=split_to_date, ) #Update existing records weekly_price.update_df(update_df) log.info( f"\nUPDATED {update_df.shape[0]} RECORDS IN weekly_price: \n\tFROM {update_df.date.min()} \n\tTO {update_df.date.max()}" ) #Add new prices to the sql database weekly_price.add_df(append_df) log.info( f"\nADDED {append_df.shape[0]} NEW RECORDS TO weekly_price: \n\tFROM {append_df.date.min()} \n\tTO {append_df.date.max()}" )
def get_tickers(): log.info("\nSCRAPPING TICKERS") #This section will scrap the ticker values for the FTSE 100 and FTSE 250 and store them in dataframes 'tick_ftse100' and 'tick_ftse250'. #Finally concatenate into 1 dataframe 'tick_ftse'. #{Perform async scrape log.info("\nFTSE 100") tick_ftse100 = ScrapeTickers("ftse100").scrape() #Collect the rows of data log.info("\nFTSE 250") tick_ftse250 = ScrapeTickers("ftse250").scrape() #Combine into 1 dataframe tick_ftse = pd.concat([tick_ftse100, tick_ftse250]) tick_ftse.sort_values(['ticker']) tick_ftse['ticker'] = [ re.sub(r'(?=[0-9A-Z])*\.(?=[0-9A-Z]+)', '-', tick) for tick in tick_ftse['ticker'] ] tick_ftse['ticker'] = [ re.sub(r'[^0-9A-Z\-]', '', tick) for tick in tick_ftse['ticker'] ] tick_ftse['last_seen_date'] = dt.date.today() return tick_ftse
def process_soup(self, soup): if soup == "": log.info('no results returned') #return to say that there has been an error return [] #Grab the header rows header = refine_soup(soup, [{ "name": "table", "attrs": { 'data-test': 'historical-prices' } }, { "name": "thead" }, { "name": "tr" }])[0] cols = [clean_col_name(th.text) for th in header] #Grab the data rows rows = refine_soup(soup, [{ "name": "table", "attrs": { 'data-test': 'historical-prices' } }, { "name": "tbody" }, { "name": "tr" }]) #If there are no dates there's no point going back further if len(rows) == 0: log.info('No more records to collect') return [] #Put the rows into the dataframe data = [] for r in rows: td = refine_soup(r, [{"name": 'td'}]) if len(td) == len(cols): data.append({c: x.text for c, x in zip(cols, td)}) return data
def process_daily_prices(ticker, ticker_id, st_date=None, en_date=None, split_from_date=None, split_to_date=None): """Function to scrape prices for a ticker between selected dates, then split into update and add records, then perform those split/add functions on the db. args: ---- ticker - str - the ticker to use in scrape ticker_id - int - the ticker id in the db st_date - datetime - the date to start the scrape en_date - datetime - the date to end the scrape split_from_date - datetime - the date to start the split split_to_date - datetime - the date to end the split log - logger """ #Get new price data if neccesary if not st_date or st_date < en_date: check, new_prices_df = get_day_prices( ticker, st_date, en_date, ) if check: new_prices_df['ticker_id'] = ticker_id update_df, append_df = split_day_prices(new_prices_df, ticker_ids=[ticker_id], from_date=split_from_date, to_date=split_to_date) #Update existing prices in the sql database daily_price.update_df(update_df) log.info( f"\nUPDATED {update_df.shape[0]} RECORDS IN daily_price: \n\tFROM {update_df.date.min()} \n\tTO {update_df.date.max()}" ) #Add new prices to the sql database daily_price.add_df(append_df) log.info( f"\nADDED {append_df.shape[0]} NEW RECORDS TO daily_price: \n\tFROM {append_df.date.min()} \n\tTO {append_df.date.max()}" ) else: log.info('No new records found') else: log.info('No new records to collect')
def process_soup(self, soup): if soup == "": log.info('no results returned') #return to say that there has been an error return [] #Get the data rows rows = refine_soup(soup, [ { "name": "table" }, { "name": "tbody" }, { "name": "tr" }, ]) #Grab the dates dates = [ refine_soup(r, [{ "name": "td" }, { "name": "span" }]) for r in rows ] dates = [d[0].text for d in dates if len(d) > 0] #Grab the labels labels = [ refine_soup(r, [{ "name": "td" }, { "name": "a" }]) for r in rows ] labels = [l[0].text for l in labels if len(l) > 0] return list(zip(dates, labels))
def filter_stocks(from_date=None, to_date=None): """Function to search for shares to buy args: ---- from_date - datetime:None - a bounding minimum date (if neccesary) to_date - datetime:None - a bounding maximum date (if neccesary) returns: ---- pandas dataframe """ #Fetch prices prices_df = sqlaq_to_df( daily_price.fetch(from_date=from_date, to_date=to_date)) ticker_df = sqlaq_to_df(ticker.fetch()) \ .rename(columns={"id":"ticker_id"}) #Filter to keep only items which are current max_date = prices_df.date.max() ticks = prices_df[prices_df.date == max_date] \ .ticker_id \ .drop_duplicates() ticks = pd.merge(ticks.to_frame(), ticker_df[["ticker_id", "ticker"]], on=["ticker_id"]) #Setup variables buy = [] sell = [] prices_df = prices_df.sort_values(['ticker_id','date']) \ .reset_index(drop=True) #Loop ticks and get results for _, r in tqdm(ticks.iterrows(), total=ticks.shape[0], desc="Loop stock to find buy signals"): tick_prices = prices_df[prices_df.ticker_id == r.ticker_id] dataset = DataSet() dataset.add_dataset(tick_prices.close, "close") #Calculate the short macd _, _, _, _, macd_short = dataset.close.calc_macd(ema_lng=26, ema_sht=12, sig_period=9) dataset.add_dataset(macd_short, "macd_short") #Normalise it macd_short = dataset.macd_short.norm_data(dataset.close.data) dataset.add_dataset(macd_short, "macd_short") #Calculate the long macd _, _, _, _, macd_long = dataset.close.calc_macd(ema_lng=26 * 5, ema_sht=12 * 5, sig_period=9 * 5) dataset.add_dataset(macd_long, "macd_long") #Normalise it macd_short = dataset.macd_long.norm_data(dataset.close.data) dataset.add_dataset(macd_long, "macd_long") #Find the previous major macd high #Find the short gradient since this high to the current position #Find the previous major macd low #Find the short gradient since this low to the current position #Calc gradients of macds grad_macd_short = dataset.macd_short.calc_grad() dataset.add_dataset(grad_macd_short, "grad_macd_short") grad_macd_long = dataset.macd_long.calc_grad() dataset.add_dataset(grad_macd_long, "grad_macd_long") #Identify if it is a buy signal check1 = (dataset.grad_macd_short.data.iloc[-1] > 0 \ and dataset.grad_macd_short.data.iloc[-2] < 0 and dataset.grad_macd_long.data.iloc[-1] > 0) if check1: buy.append({ "ticker": r.ticker, "ticker_id": r.ticker_id, "short_grad_pre": dataset.grad_macd_short.data.iloc[-2], "short_grad_post": dataset.grad_macd_short.data.iloc[-1], "short_grad_change": abs(dataset.grad_macd_short.data.iloc[-2]) + abs(dataset.grad_macd_short.data.iloc[-1]), "long_grad": dataset.grad_macd_long.data.iloc[-1], "macd_long": dataset.macd_long.data.iloc[-1] }) #Put into a dataframe buy_df = pd.DataFrame(buy) if buy_df.shape[0]: buy_df = buy_df.sort_values(["long_grad"], ascending=[False]) log.info(f"{buy_df.shape[0]} opportunities found") return buy_df # import numpy as np # import plotly.graph_objects as go # from plotly.subplots import make_subplots # for c in ['macd','real_macd_min','prev_min_macd','prev_min_macd_grad','real_macd_max','prev_max_macd','prev_max_macd_grad']: # prices_d_df[c] = np.nan # for tick in tqdm(prices_d_df.ticker.unique(),total=prices_d_df.ticker.unique().shape[0]): # tick_df = prices_d_df.loc[prices_d_df.ticker == tick,:] # tick_df = calc_ema_macd(tick_df) # tick_df['real_macd_min'] = flag_mins(tick_df['macd'],_period=1,_cur=False) # tick_df['real_macd_max'] = flag_maxs(tick_df['macd'],_period=1,_cur=False) # ### MINS ### # #Find the last 2 mins # tick_df["prev_min_macd"],tick_df["prev_min_macd_date"],tick_df["prev_min_macd_index"] = prev_max_min(tick_df[["date",'macd',"real_macd_min"]].copy(),'macd',"real_macd_min",1) # tick_df["prev_min_macd_change"] = mk_prev_move_float(tick_df['prev_min_macd']) # tick_df["prev_min_macd_index_change"] = mk_prev_move_float(tick_df['prev_min_macd_index']) # #Calc the gradient # tick_df['prev_min_macd_grad'] = tick_df["prev_min_macd_change"] / tick_df["prev_min_macd_index_change"] # ### MAXS ### # #Find the last 2 maxs # tick_df["prev_max_macd"],tick_df["prev_max_macd_date"],tick_df["prev_max_macd_index"] = prev_max_min(tick_df[["date",'macd',"real_macd_max"]].copy(),'macd',"real_macd_max",1) # tick_df["prev_max_macd_change"] = mk_prev_move_float(tick_df['prev_max_macd']) # tick_df["prev_max_macd_index_change"] = mk_prev_move_float(tick_df['prev_max_macd_index']) # #Calc the gradient # tick_df['prev_max_macd_grad'] = tick_df["prev_max_macd_change"] / tick_df["prev_max_macd_index_change"] # prices_d_df.loc[prices_d_df.ticker == tick,:] = tick_df # #Filter to signal items # buy_mask = (prices_d_df.date == prices_d_df.date.max()) & (prices_d_df.prev_min_macd_grad > 0) & (prices_d_df.macd > prices_d_df.macd.shift(1)) & (prices_d_df.macd.shift(1) < prices_d_df.macd.shift(2)) # buy_df = prices_d_df[buy_mask] # buy_df['signal'] = 'BUY' # sell_mask = (prices_d_df.date == prices_d_df.date.max()) & (prices_d_df.prev_min_macd_grad < 0) & (prices_d_df.macd < prices_d_df.macd.shift(1)) & (prices_d_df.macd.shift(1) > prices_d_df.macd.shift(2)) # sell_df = prices_d_df[sell_mask] # sell_df['signal'] = 'SELL' # print(f"COUNT BUY -> {buy_df.shape[0]}") # print(f"COUNT SELL -> {sell_df.shape[0]}") # display(buy_df) # display(sell_df) # ft_eng_w_df = ft_eng_w_df[['ticker','date','close','macd','prev_min_macd_grad']] # ft_eng_w_df['open'] = ft_eng_w_df.close # ft_eng_w_df['high'] = ft_eng_w_df.close # ft_eng_w_df['low'] = ft_eng_w_df.close # ft_eng_w_df = ft_eng_w_df.sort_values(['ticker','date']).reset_index(drop=True) # tick = 'BAB' # tmp_df = ft_eng_w_df[ft_eng_w_df.ticker == tick] # # tmp_df = calc_ema_macd(tmp_df) # fig = make_subplots(rows=2,cols=1,specs=[[{'secondary_y':False}],[{'secondary_y':True}]]) # #Chart 1 # fig.add_trace( # go.Ohlc( # x=tmp_df.date, # open=tmp_df.open, # high=tmp_df.high, # low=tmp_df.low, # close=tmp_df.close, # name='OHLC' # ), # row=1,col=1 # ) # # fig.add_trace( # # go.Scatter( # # x=tmp_df.date, # # y=tmp_df.ema12, # # name='ema12' # # ), # # row=1,col=1 # # ) # # fig.add_trace( # # go.Scatter( # # x=tmp_df.date, # # y=tmp_df.ema26, # # name='ema26' # # ), # # row=1,col=1 # # ) # #Chart 2 # fig.add_trace( # go.Bar( # x=tmp_df[tmp_df.macd > 0].date,y=tmp_df[tmp_df.macd > 0].macd, # marker_color='green' # ), # row=2,col=1 # ) # fig.add_trace( # go.Bar( # x=tmp_df[tmp_df.macd < 0].date,y=tmp_df[tmp_df.macd < 0].macd, # marker_color='red' # ), # row=2,col=1 # ) # # fig.add_trace( # # go.Scatter( # # x=tmp_df.date, # # y=tmp_df.macd_line, # # name='macd line' # # ), # # row=2,col=1,secondary_y=True # # ) # # fig.add_trace( # # go.Scatter( # # x=tmp_df.date, # # y=tmp_df.signal_line, # # name='signal line' # # ), # # row=2,col=1,secondary_y=True # # ) # #Establish range selector and buttons # rng_sel_di = dict( # buttons=list([ # dict(count=1, # label="1m", # step="month", # stepmode="backward"), # dict(count=6, # label="6m", # step="month", # stepmode="backward"), # dict(count=1, # label="YTD", # step="year", # stepmode="todate"), # dict(count=1, # label="1y", # step="year", # stepmode="backward"), # dict(count=5, # label="5y", # step="year", # stepmode="backward"), # dict(count=3, # label="3y", # step="year", # stepmode="backward"), # dict(step="all") # ]) # ) # for axis in ['xaxis' # ,'xaxis2' # ]: # fig.layout[axis].rangeselector=rng_sel_di # fig.layout[axis].rangeslider.visible=False # # fig.layout.yaxis.domain = [0.7,1.0] # # fig.layout.yaxis2.domain = [0.0,0.3] # fig.update_yaxes(automargin=True) # fig.update_layout( # title=f'Charts for {tick}' # ) # fig.show() # display(ft_eng_w_df[ft_eng_w_df.ticker == tick][['ticker','date','close','ema26','macd','prev_min_macd_grad']].tail(15))
sell = [] prices_df = prices_df.sort_values(['ticker_id','date']) \ .reset_index(drop=True) #Loop ticks and get results for _, r in tqdm(ticks.iterrows(), total=ticks.shape[0], desc="Loop stock to find buy signals"): tick_prices = prices_df[prices_df.ticker_id == r.ticker_id] dataset = DataSet() dataset.add_dataset(tick_prices.change, "change") #Calc consecutive losses cons_loses = dataset.change.calc_consec_loss() dataset.add_dataset(cons_loses, "cons_loses") #Identify if it is a buy signal check1 = (dataset.cons_loses.data.iloc[-1] == 0 \ and dataset.cons_loses.data.iloc[-2] >= 3) if check1: buy.append({ "ticker": r.ticker, "ticker_id": r.ticker_id, "cons_loses": dataset.cons_loses.data.iloc[-2], }) #Put into a dataframe buy_df = pd.DataFrame(buy) \ .sort_values(["cons_loses"], ascending=[False]) log.info(f"{buy_df.shape[0]} opportunities found")
def daily_to_weekly_price_conversion(dp_df): """Function to convert the daily prices into weekly prices args: ------ dp_df - pandas dataframe - the daily prices returns: ------ pandas dataframe """ log.info('Converting daily prices to weekly prices') #Mark the week identifier dp_df['isocalendar'] = [x.isocalendar()[:2] for x in dp_df['date']] #Get highs and lows high_df = dp_df.loc[dp_df['high'] > 0, ['high','ticker_id','isocalendar']] \ .groupby(['ticker_id','isocalendar'], as_index=False) \ .max() low_df = dp_df.loc[dp_df['low'] > 0, ['low','ticker_id','isocalendar']] \ .groupby(['ticker_id','isocalendar'], as_index=False) \ .min() #Get total volume for the week vol_df = dp_df.loc[dp_df['volume'] > 0, ['volume','ticker_id','isocalendar']] \ .groupby(['ticker_id','isocalendar'], as_index=False) \ .sum() #Get max and min week days max_wk_day = dp_df.loc[dp_df['close'] > 0, ['date','ticker_id','isocalendar']] \ .groupby(['ticker_id','isocalendar'], as_index=False) \ .max() min_wk_day = dp_df.loc[dp_df['open'] > 0, ['date','ticker_id','isocalendar']] \ .groupby(['ticker_id','isocalendar'], as_index=False) \ .min() #Get open price open_df = pd.merge(dp_df[['date', 'open']], min_wk_day, on='date') #Get close price close_df = pd.merge(dp_df[['date', 'close']], max_wk_day, on='date') #Form the final df wp_df = dp_df[['ticker_id', 'isocalendar']] wp_df = pd.merge(wp_df, min_wk_day, on=['ticker_id', 'isocalendar'], how="left") #date wp_df = pd.merge(wp_df, high_df, on=['ticker_id', 'isocalendar'], how="left") #high wp_df = pd.merge(wp_df, low_df, on=['ticker_id', 'isocalendar'], how="left") #low wp_df = pd.merge(wp_df, vol_df, on=['ticker_id', 'isocalendar'], how="left") #volume wp_df = pd.merge(wp_df, open_df[['ticker_id', 'isocalendar', 'open']], on=['ticker_id', 'isocalendar'], how="left") #open wp_df = pd.merge(wp_df, close_df[['ticker_id', 'isocalendar', 'close']], on=['ticker_id', 'isocalendar'], how="left") #close wp_df['change'] = wp_df['close'] - wp_df['open'] wp_df = wp_df.drop_duplicates() \ .reset_index(drop=True) #Get the monday of each week wp_df['date'] = [calc_wk_st_date(x) for x in wp_df.date] wp_df = wp_df.drop(columns=['isocalendar']) #Fill missing values wp_df = wp_df.fillna(0) return True, wp_df
def full_scrape(): """Function to perform a full scrape of all available prices""" ######################### ### SCRAPPING TICKERS ### ######################### #scrape the tickers tick_ftse = get_tickers() #create new records in the ticker table tick_db = create_new_tickers(tick_ftse, ) #update ticker records with last seen date tick_ftse = pd.merge(tick_ftse, tick_db[["ticker", "id"]], on="ticker") ticker.update_df(tick_ftse) #create new records in the ticker_market table _ = create_new_ticker_markets(tick_ftse, ) #Create a list of ticker ids ticker_ids = tick_ftse.id.to_list() #################### ### DAILY PRICES ### #################### log.info("\nSCRAPPING DAILY PRICES") #Make a call for all the latest dates latest_dates_df = sqlaq_to_df( daily_price.fetch_latest(session, ticker_ids=ticker_ids)) latest_dates_df["max_date"] = latest_dates_df.max_date.astype("datetime64") #Calc the en_date for today en_date = calc_en_date() if str(CONFIG['web_scrape']['mode']).lower() == 'update': latest_dates_df["st_date"] = [ calc_st_date(v) for v in latest_dates_df.max_date ] else: latest_dates_df["st_date"] = dt.datetime(1970, 1, 1) #Delete existing data daily_price.remove() #Loop through the tickers in tick_ftse and for each one get the latest date of scrape. #Convert this date into a timestamp. #Scrape all new data and add to the database. dp_errors = [] run_time = ProcessTime() for _, r in tqdm(latest_dates_df.iterrows(), total=latest_dates_df.shape[0], desc="Scrape daily prices"): log.info(f'\n{len(run_time.lap_li)} RUNNING FOR -> {r.id}, {r.ticker}') log.info(f'Latst date - {r.max_date}') try: #Get new price data if neccesary and add/update the database process_daily_prices(r.ticker, r.id, st_date=r.st_date, en_date=en_date, split_from_date=r.max_date, split_to_date=None) except Exception as e: log.error(e) dp_errors.append({'ticker': r.ticker, "error": e}) #Lap log.info(run_time.lap()) log.info(run_time.show_latest_lap_time(show_time=True)) log.info(f"DAILY SCRAPE RUN TIME - {run_time.end()}") ##################### ### WEEKLY PRICES ### ##################### log.info("\nSCRAPPING WEEKLY PRICES") #Make a call for all the latest dates latest_dates_df = sqlaq_to_df( weekly_price.fetch_latest(session, ticker_ids=ticker_ids)) latest_dates_df["max_date"] = latest_dates_df.max_date.astype("datetime64") #Loop through the tickers in tick_ftse and for each one get the latest date of scrape. #Convert this date into a timestamp. #Scrape all new data and add to the database. wp_errors = [] run_time = ProcessTime() for _, r in tqdm(latest_dates_df.iterrows(), total=latest_dates_df.shape[0], desc="Process weekly prices"): log.info(f'\n{len(run_time.lap_li)} RUNNING FOR -> {r.id}, {r.ticker}') try: #Get new price data if neccesary if r.max_date < en_date: process_weekly_prices( r.id, split_from_date=r.max_date, ) else: log.info('No new records to collect') continue except Exception as e: log.error(e) wp_errors.append({'ticker': r.ticker, "error": e}) #Lap log.info(run_time.lap()) log.info(run_time.show_latest_lap_time(show_time=True)) log.info('\n\n') log.info(f"WEEKLY SCRAPE RUN TIME - {run_time.end()}") #################### ### PRINT ERRORS ### #################### log.info(f'\nDAILY ERROR COUNT -> {len(dp_errors)}') if len(dp_errors) > 0: log.info('DALIY ERRORS ->') for e in dp_errors: log.error(e) log.info(f'\nWEEKLY ERROR COUNT -> {len(wp_errors)}') if len(wp_errors) > 0: log.info('WEEKLY ERRORS ->') for e in wp_errors: log.error(e)
def get_day_prices(ticker: str, st_date: None, en_date: None): """Function fr gtting daily stock prices from webscrapping args: ------ ticker - str - the identifier for the stock being looked at needs to math Yahoo.co.uk sec_ref_li - the list of time periods to scrape returns: ------ pandas dataframe - contains all required prices """ log.info( f'Getting DAILY prices for -> {ticker} from {str(st_date)} to {str(en_date)}' ) #Perform async scrapes tick_df = ScrapePrices(ticker, st_date, en_date).scrape() #Check for rows - if none then return if not tick_df.shape[0]: log.warning("Early exit due to no new records being found") return False, None #Reformat strings to floats tick_df['open'] = [str_to_float_format(v) for v in tick_df.open] tick_df['high'] = [str_to_float_format(v) for v in tick_df.high] tick_df['low'] = [str_to_float_format(v) for v in tick_df.low] tick_df['close'] = [str_to_float_format(v) for v in tick_df.close] tick_df['adj_close'] = [str_to_float_format(v) for v in tick_df.adj_close] tick_df['volume'] = [str_to_float_format(v) for v in tick_df.volume] tick_df['change'] = tick_df.close - tick_df.open #Reformat date tick_df['date'] = [ conv_dt(v, date_or_time="short_date") for v in tick_df.date ] #Add the ticker series tick_df['ticker'] = ticker #Mark the week identifier tick_df['isocalendar'] = [x.isocalendar()[:2] for x in tick_df['date']] min_wk_day = tick_df.loc[tick_df['open'] > 0, ['date','isocalendar']] \ .groupby('isocalendar') \ .min() \ .reset_index() \ .rename(columns={'date':'week_start_date'}) tick_df = pd.merge(tick_df, min_wk_day, on=['isocalendar']) #CLEANING - Remove any rows with no prices tick_df = tick_df[tick_df.open > 0] #CLEANING - Copy row above where the change has been more than 90% tick_df['cl_change'] = (tick_df.close - tick_df.close.shift(1)) / tick_df.close.shift(1) mask = tick_df['cl_change'] < -0.9 tick_df.loc[mask, 'open'] = tick_df.open.shift(-1).copy().loc[mask] tick_df.loc[mask, 'close'] = tick_df.close.shift(-1).copy().loc[mask] tick_df.loc[mask, 'high'] = tick_df.high.shift(-1).copy().loc[mask] tick_df.loc[mask, 'low'] = tick_df.low.shift(-1).copy().loc[mask] #Fill missing values tick_df = tick_df.fillna(0) tick_df = tick_df[[ 'ticker', 'date', 'week_start_date', 'open', 'close', 'high', 'low', 'change', 'volume' ]] return True, tick_df