def get_symbol_db(min_market_cap=100): sql = f""" SELECT symbol FROM eproject_fx.public.biz_fin WHERE CAST(biz_fin."marketCapitalization" as float)>{min_market_cap} GROUP BY symbol; """ df = pg_db.query_df(sql) return df
def get_stock_data_db(symbol, num_months_ago=1): sql = f""" SELECT * FROM stocks_finn_hub WHERE symbol='{symbol}' and date>'now'::timestamp - '{num_months_ago} month'::interval order by date; """ df = pg_db.query_df(sql) return df
def get_breakout_symbols_db(window_percentage=2, window_length=15, min_market_cap=100): """ :param window_percentage: :param window_length: :param min_market_cap: :return: list of symbols 'breaking' out of 'consolidation' on the latest candle """ sql1 = f""" SELECT r.symbol, max(r.c), min(r.c), min(r.c) / nullif( max(r.c), 0) as min_max_ratio FROM (SELECT stocks_finn_hub.*, rank() OVER ( PARTITION BY symbol ORDER BY date DESC) FROM stocks_finn_hub) r WHERE RANK <= {window_length + 1} AND RANK>1 GROUP BY r.symbol; """ df1 = pg_db.query_df(sql1) df1 = df1[df1['min_max_ratio'] >= (1 - window_percentage / 100)] sql2 = f""" SELECT r.c, biz_fin.* FROM (SELECT stocks_finn_hub.*, rank() OVER ( PARTITION BY symbol ORDER BY date DESC) FROM stocks_finn_hub) r INNER JOIN biz_fin USING (symbol) WHERE RANK = 1 """ df2 = pg_db.query_df(sql2) df3 = pd.merge(df1, df2, on='symbol') result_df = df3[(df3['c'] > df3['max'])] return result_df
def update_data_db_symbol(symbol): # get last date of symbol in database sql = f"SELECT Max(t) as max_date FROM stocks_finn_hub WHERE symbol='{symbol}'" df_last = pg_db.query_df(sql) start = one_year_ago_u if today.day_of_week == 1: min_delta_days = 3 * 24 * 60 * 60 else: min_delta_days = 1 * 24 * 60 * 60 if len(df_last): if df_last.loc[0, 'max_date'] is not None: last_day_in_db = df_last.loc[0, 'max_date'] start = last_day_in_db + 1 * 24 * 60 * 60 if today_u - start > min_delta_days: candles_df = get_stock_data(symbol, start, today_u) if candles_df is not None and len(candles_df): pg_db.df_to_db(candles_df, name='stocks_finn_hub', if_exists='append', index=False)
def apply_the_filters(start_row=0, use_forecast=False, pc_higher_sp=10, min_to_52w_low=1.3, min_52w_high=0.75): global df_symbol # %% 0 get all symbols df_symbols = get_symbols() df_forecast = pd.read_csv( '/home2/eproject/veehuen/python/algo102/fbprophet/growth_stocks.csv') # %% df = df_forecast if use_forecast else df_symbols df_metric_list = [] df['condition_1'] = False df['condition_2'] = False df['condition_3'] = False df['sma_50'] = None df['sma_150'] = None df['sma_200'] = None # df['52WeekHigh'] = None # df['52WeekLow'] = None # df['priceRelativeToS&P50013Week'] = None # df['priceRelativeToS&P50026Week'] = None # df['priceRelativeToS&P5004Week'] = None # %% 1 get current price per symbol filtered_symbol = [] for i, row in df[start_row:].iterrows(): symbol = row['symbol'] try: conditions = [False] * 3 # if i % 1500 == 0 and i > 0: # break print(i, symbol, datetime.now()) sql = f"SELECT * FROM stocks_finn_hub WHERE symbol='{symbol}' order by DATE DESC LIMIT 200" df_symbol = pg_db.query_df(sql) # skip if data less than 200 samples if len(df_symbol) < 200: continue df_symbol.index = df_symbol['date'].dt.date df_symbol.sort_index(inplace=True) df_symbol[f'sma_50'] = df_symbol['c'].rolling(50).mean() df_symbol['sma_150'] = df_symbol['c'].rolling(150).mean() df_symbol['sma_200'] = df_symbol['c'].rolling(200).mean() current = df_symbol.iloc[-1]['c'] sma_50 = df_symbol.iloc[-1]['sma_50'] sma_150 = df_symbol.iloc[-1]['sma_150'] sma_200 = df_symbol.iloc[-1]['sma_200'] df.loc[i, 'sma_50'] = sma_50 df.loc[i, 'sma_150'] = sma_150 df.loc[i, 'sma_200'] = sma_200 except Exception as e: print(symbol, e) continue try: if current > sma_50 > sma_150 > sma_200: conditions[0] = True df.loc[i, 'condition_1'] = True except TypeError: continue try: bs = get_basic_financials(symbol, 'price') data_folder = '/home2/eproject/vee-h-phan.com/algo102/data_providers/finnhub/data' dict_to_json_zipped(bs, f'{data_folder}/bs_{symbol}.json.gzip') except Exception as e: send_eri_mail('*****@*****.**', e.__repr__(), 'finhubb error: bs') continue if i % 3 == 0: sleep_time = randint(3, 8) print(i, f'sleeping {sleep_time} seconds') time.sleep(sleep_time) high_52_week = bs.get('metric').get('52WeekHigh') low_52_week = bs.get('metric').get('52WeekLow') try: if current / low_52_week > min_to_52w_low and current / high_52_week > min_52w_high: conditions[1] = True df.loc[i, 'condition_2'] = True except TypeError: continue price_relative_to_SP500 = [ bs.get('metric').get('priceRelativeToS&P50013Week'), bs.get('metric').get('priceRelativeToS&P50026Week'), bs.get('metric').get('priceRelativeToS&P5004Week'), # bs.get('metric').get('priceRelativeToS&P50052Week'), ] # save metric to df row_metric = pd.DataFrame.from_dict(bs.get('metric'), orient='index').T row_metric.index = [i] df_metric_list.append(row_metric) try: if all(i >= pc_higher_sp for i in price_relative_to_SP500): conditions[2] = True df.loc[i, 'condition_3'] = True except TypeError: continue if all(conditions): filtered_symbol.append(symbol) print(filtered_symbol) print(df.loc[i]) if i % 1000 == 0 and i > 0: df.to_csv( f'/home2/eproject/vee-h-phan.com/algo102/data_providers/finnhub/data/growth_stocks_filtered_{i}.csv', index=False) if i % 100 == 0 and i > 0: send_eri_mail('*****@*****.**', f'processed {i} symbols', 'finhubb progress: bs') df_metric = pd.concat(df_metric_list) final_df_filtered = pd.concat([df, df_metric], axis=1) final_df_filtered.to_csv( '/home2/eproject/vee-h-phan.com/algo102/data_providers/finnhub/data/growth_stocks_filtered.csv', index=False) pg_db.df_to_db(final_df_filtered, name='biz_fin', if_exists='replace', index=False)
def update_data_db(): # get symbols stocks_list = get_symbols() stocks_df = pd.DataFrame(stocks_list) # for i, symbol in enumerate(['GOOG', 'AAPL', ]): j = 0 last_slept_at = -1 for i, symbol in enumerate(stocks_df['symbol']): # get last date of symbol in database sql_ = f"SELECT Max(t) as max_date FROM stocks_finn_hub WHERE symbol='{symbol}'" df_last = pg_db.query_df(sql_) # only get data if last day is more thn 1 day before today start = one_year_ago_u if today.day_of_week == 1: min_delta_days = 3 * 24 * 60 * 60 else: min_delta_days = 1 * 24 * 60 * 60 if len(df_last): if df_last.loc[0, 'max_date'] is not None: last_day_in_db = df_last.loc[0, 'max_date'] start = last_day_in_db + 1 * 24 * 60 * 60 # if df_last.loc[0, 'max_date']: # last_day_in_db = df_last.loc[0, 't'] # next_day_in_db = pendulum.from_timestamp() # else: # start_u = one_year_ago # delay => to not break API Limit if today_u - start > min_delta_days: candles_df = get_stock_data(symbol, start, tomorrow_u) # print('sleeping 1 second') # time.sleep(1) j += 1 print(f"j={j}") if j % 5 == 0 and j > 0 and j != last_slept_at: print('sleeping for 5 seconds...') time.sleep(10) last_slept_at = j if candles_df is not None and len(candles_df): try: pg_db.df_to_db(candles_df, name='stocks_finn_hub', if_exists='append', index=False) except Exception as e: print(e) candles_df.to_csv(f'csv/{symbol.csv}') print(f'finished {i} {symbol}') continue else: print(f'skipping {symbol}') if i % 1000 == 0 and i > 0: msg = f"<p>completed {i} stocks....</p>" send_eri_mail('*****@*****.**', message_=msg, subject='finhubb data progress', message_type='html') # update meta sql_ = f"""UPDATE eproject_fx.public.mr_meta SET last_updated = '{pendulum.now(tz='UTC').strftime('%Y-%m-%d %H:%M UTC')}' WHERE job_name='update stock data'""" pg_db.query(sql_)