def get_same_industry_tickers(ticker, samplesize): #create usefull string for ticker ticker = str(ticker) for character in ["[", "]", "'"]: if character in str(ticker): ticker = ticker.replace(character, "") #create list of all tickers in same industry df_alltickers = pd.read_csv('./tickers.csv') industry = df_alltickers[df_alltickers.ticker == ticker].iloc[0]['industry'] industrytickers = df_alltickers[df_alltickers.industry == industry]['ticker'].tolist() industrytickers.remove(ticker) #select random sample of same-industry tickers sameindustrytickers = random.sample(industrytickers, samplesize) #create dataframe df_sameindustrytickers = pd.DataFrame() #dowload and append data for each ticker (one-by-one to avoid Frank's error) for sit in sameindustrytickers: sit = [sit] yr = yahoo_reader.finance_data(tickers=sit) df, tickers = yr.get_fix_yahoo_data() df_sameindustrytickers = df_sameindustrytickers.append( df, ignore_index=True) return df_sameindustrytickers
def get_large_decreases_in_industry(ticker, percentage): #create usefull string for ticker ticker = str(ticker) for character in ["[", "]", "'"]: if character in str(ticker): ticker = ticker.replace(character, "") #create list of all tickers in same industry df_alltickers = pd.read_csv('./tickers.csv') industry = df_alltickers[df_alltickers.ticker == ticker].iloc[0]['industry'] industrytickers = df_alltickers[df_alltickers.industry == industry]['ticker'].tolist() industrytickers.remove(ticker) #for counter number = len(industrytickers) #main dataframe df_largedecreases = pd.DataFrame() #dowload and append data for each ticker (one-by-one to avoid Frank's error) for sit in industrytickers: try: print("Downloading data for " + str(number) + " same-industry tickers.") sit = [sit] yr = yahoo_reader.finance_data(tickers=sit) df, tickers = yr.get_fix_yahoo_data() #calculate decrease and select 100 rows above and 50 rows below rows where this decrease is larger than 'percentage' df['perc_change'] = df.close / df.open df['arounddecrease'] = 0 for X in list(range(-100, 50)): df['arounddecrease'] = np.where( df.perc_change.shift(X) < percentage, 1, df.arounddecrease) #append ticker data to main dataframe df_largedecreases = df_largedecreases.append(df, ignore_index=True) number = number - 1 print("Ticker data added") except: number = number - 1 print("No data") df_largedecreases = df_largedecreases[df_largedecreases.arounddecrease == 1] df_largedecreases = df_largedecreases.drop( columns=['arounddecrease', 'perc_change']) return df_largedecreases
def get_same_industry_similar_volatility_tickers(ticker, window, samplesize): #download data for ticker A yr = yahoo_reader.finance_data(tickers=[ticker]) dfA, tickersA = yr.get_fix_yahoo_data() #calculate price and volatility (average standarddeviation over window) and save it for the last day dfA['volatility'] = dfA.close.rolling(window).std() volatilityA = dfA.volatility.tail(1) #create usefull string for ticker ticker = str(ticker) for character in ["[", "]", "'"]: if character in str(ticker): ticker = ticker.replace(character, "") #create list of all tickers in same industry df_alltickers = pd.read_csv('./tickers.csv') industry = df_alltickers[df_alltickers.ticker == ticker].iloc[0]['industry'] industrytickers = df_alltickers[df_alltickers.industry == industry]['ticker'].tolist() industrytickers.remove(ticker) #for counter number = len(industrytickers) #main dataframe and dataframe for last day of dataframe df_industrytickers = pd.DataFrame() df_tails = pd.DataFrame() #dowload and append data for each ticker (one-by-one to avoid Frank's error) for sit in industrytickers: try: print("Downloading data for " + str(number) + " same-industry tickers.") sit = [sit] yr = yahoo_reader.finance_data(tickers=sit) df, tickers = yr.get_fix_yahoo_data() #calculate volatility df['volatility'] = df.close.rolling(window).std() #take the last day of dataframe and compare volatility with that of ticker A df_tail = df.tail(1) volatility = df_tail.volatility.tail(1) df_tail['vol_diff'] = abs(volatilityA - volatility) df_industrytickers = df_industrytickers.append(df, ignore_index=True) df_tails = df_tails.append(df_tail, ignore_index=True) number = number - 1 print("Ticker data added") except: number = number - 1 print("No data") #sort the tickers by the difference in volatility compared to ticker A df_tails = df_tails.sort_values('vol_diff', ascending=True) df_tails = df_tails.head(samplesize) sample = list(df_tails.ticker.unique()) #make final dataframe with all data from stocks in sample df_sample = pd.DataFrame() for ticker in sample: df = df_industrytickers[df_industrytickers.ticker == ticker] df_sample = df_sample.append(df, ignore_index=True) return df_sample
def get_sample(ticker, vol_samplesize, vol_window, decrease_percentage): #download data for ticker A yr = yahoo_reader.finance_data(tickers=[ticker]) dfA, tickersA = yr.get_fix_yahoo_data() #calculate price and volatility (average standarddeviation over window) and save it for the last day dfA['volatility'] = dfA.close.rolling(vol_window).std() volatilityA = dfA.volatility.tail(1) #create usefull string for ticker ticker = str(ticker) for character in ["[", "]", "'"]: if character in str(ticker): ticker = ticker.replace(character, "") #create list of all tickers in same industry df_alltickers = pd.read_csv('./tickers.csv') industry = df_alltickers[df_alltickers.ticker == ticker].iloc[0]['industry'] industrytickers = df_alltickers[df_alltickers.industry == industry]['ticker'].tolist() industrytickers.remove(ticker) #for counter number = len(industrytickers) #main dataframe and dataframe for last day of dataframe df_industrytickers = pd.DataFrame() df_tails = pd.DataFrame() #dowload and append data for each ticker (one-by-one to avoid Frank's error) for sit in industrytickers: try: print("Downloading data for " + str(number) + " same-industry tickers.") sit = [sit] yr = yahoo_reader.finance_data(tickers=sit) df, tickers = yr.get_fix_yahoo_data() #calculate volatility df['volatility'] = df.close.rolling(vol_window).std() #take the last day of dataframe and compare volatility with that of ticker A df_tail = df.tail(1) volatility = df_tail.volatility.tail(1) df_tail['vol_diff'] = abs(volatilityA - volatility) df_tails = df_tails.append(df_tail, ignore_index=True) #calculate decrease and select 100 rows above and 50 rows below rows where this decrease is larger than 'percentage' df['perc_change'] = df.close / df.open df['arounddecrease'] = 0 for X in list(range(-100, 50)): df['arounddecrease'] = np.where( df.perc_change.shift(X) < decrease_percentage, 1, df.arounddecrease) #append rows around decreases to df_largedecreases and all rows to df_industrytickers df_industrytickers = df_industrytickers.append(df, ignore_index=True) number = number - 1 print("Ticker data added") except: number = number - 1 print("No data") #sort the tickers by the difference in volatility compared to ticker A df_tails = df_tails.sort_values('vol_diff', ascending=True) df_tails = df_tails.head(vol_samplesize) sample = list(df_tails.ticker.unique()) print(sample) #make final dataframe with all data from stocks in sample df_sample = pd.DataFrame() for ticker in sample: df = df_industrytickers[(df_industrytickers.ticker == ticker) | (df_industrytickers.arounddecrease == 1)] df_sample = df_sample.append(df, ignore_index=True) return df_sample
import numpy as np import yahoo_reader import preprocessing as pp import lstm_utils as utils import lstm_model import gc import matplotlib.pyplot as plt user = utils.load_user_from_yml(yml_file='./configs/user_settings.yml') user_tickers = utils.get_tickers_for_a_user(user=user) tickers_done = utils.get_tickers_done('./results/') tickers_to_do = [ ticker for ticker in user_tickers if ticker not in tickers_done ] yr = yahoo_reader.finance_data(tickers=tickers_to_do[:1]) #df = pd.read_csv('forflight.csv', sep=',') df, tickers = yr.get_fix_yahoo_data() #ichimoku cloud #tickers = df.ticker.unique().tolist() #df = df_main[df_main.ticker == 'AMAG'].reset_index(drop=True) #%% #df = df[df.ticker == 'ASRV'].reset_index(drop=True) #new_df_main = pd.DataFrame([]) #split = 100 #rest = len(df)%split # #df = df[rest:] #print(len(df)) #for i in range(int(len(df)/split)-2):
import pandas as pd import numpy as np import yahoo_reader as yr import processing as ps from sklearn.model_selection import RandomizedSearchCV, train_test_split, ShuffleSplit from sklearn.ensemble import RandomForestRegressor from scipy.stats import randint as sp_randint import plots #%% # Get Stock Data yr = yr.finance_data() df = yr.getData() #%% # Process for predictions #df = ps.prepData(df) df = ps.genFeatures(df) df = ps.featureProcessing(df) df = ps.genTargets(df) #%% # Define X and y X = df[['open_sc','high_sc','low_sc','close_sc','volume_sc', 'year_sc', 'ce_month_x','ce_month_y','ce_dow_x','ce_dow_y','ce_doy_x','ce_doy_y', 'high_low_sc','open_min1_sc','close_min1_sc','volume_min1_sc', 'high_low_min1_sc','open_min5_sc','close_min5_sc','volume_min5_sc', 'high_low_min5_sc','open_cagr_sc','close_cagr_sc','ticker_en']] y = df['regressor_y']