def get_data(symbol, test=None, predictions=None): history = yfinance.Ticker(symbol).history(period=str(num_years) + 'y', auto_adjust=False).reset_index() history = get_ta(history, volume=True, pattern=False) history.columns = map(str.lower, history.columns) history['date'] = pd.to_datetime(history['date']) history['next_return'] = history['close'].pct_change(1).shift(-1) pos_cutoff = history[history['next_return'] > 0]['next_return'].mean() neg_cutoff = history[history['next_return'] < 0]['next_return'].mean() history['return_class'] = 0 #history.loc[ history['next_return']<neg_cutoff, 'return_class'] = -1 #history.loc[ history['next_return']>0, 'return_class'] = 1 history.loc[history['next_return'] > pos_cutoff / 6, 'return_class'] = 1 #history.loc[ history['next_return']>pos_cutoff*2, 'return_class'] = 3 history = history.dropna().reset_index(drop=True) print('cutoffs %.2f%%' % (pos_cutoff * 100)) print('distribution', history.groupby(by=['return_class'])['open'].count()) print(history.groupby(by=['return_class'])['open'].count() / len(history)) print(history.groupby(by=['return_class'])['next_return'].mean()) return history
def get_data(self, with_decision_tree=False): all_data = [] for ticker in self.tickers: ticker_data = yfinance.Ticker(ticker) ticker_data = ticker_data.history(period=self.period, auto_adjust=False) ticker_data = ticker_data.reset_index() ticker_data.columns = map(str.lower, ticker_data.columns) ticker_data["return"] = ticker_data["close"].pct_change() #ticker_data["return"] = ticker_data["close"] / ticker_data["open"] - 1 #ticker_data['target'] = ticker_data["close"].shift(-self.hold_length) / ticker_data["close"] - 1 ticker_data['intraday'] = ticker_data["close"] / ticker_data["open"] - 1 ticker_data = ticker_data.drop(columns=['dividends','stock splits']) ticker_data = get_ta(ticker_data, pattern = True) ticker_data.columns = map(str.lower, ticker_data.columns) ticker_data["ticker"] = ticker ticker_data["range"] = (ticker_data["high"]/ticker_data["low"])-1 ticker_data.dropna(how="any", inplace=True) all_data.append(ticker_data) self.history_df = pd.concat(all_data) self.history_df = self.history_df.sort_values(by=['date']) #self.history_df = pd.concat([self.all_historic_data, ticker_data]) print(self.history_df)
def get_data(self): all_historic_data = [] for ticker in self.tickers: #print('getting data for', ticker) ticker_data = yfinance.Ticker(ticker) ticker_data = ticker_data.history(period=self.period, auto_adjust=False) ticker_data = get_ta(ticker_data, True, self.pattern, self.hold_length) ticker_data = ticker_data.reset_index() ticker_data.columns = map(str.lower, ticker_data.columns) ticker_data["return"] = ticker_data["close"].pct_change() ticker_data["range"] = (ticker_data["high"]/ticker_data["low"])-1 ticker_data = ticker_data.drop(columns=['dividends','stock splits']) ticker_data["ticker"] = ticker ticker_data.dropna(how="any", inplace=True) ticker_data = ticker_data.reset_index(drop=True) all_historic_data.append(ticker_data) self.history_df = pd.concat(all_historic_data) self.history_df = self.history_df.dropna(thresh=100,axis=1) self.history_df = self.history_df.sort_values(by=['date']) self.history_df = self.history_df.reset_index(drop=True) self.history_df.replace([np.inf, -np.inf], np.nan) self.history_df = self.history_df.dropna() self.history_df = self.history_df.replace([np.inf, -np.inf], np.nan) self.history_df = self.history_df.dropna()
def get_data(symbol, period): history = yfinance.Ticker(symbol).history(period=period, auto_adjust=False).reset_index() history = get_ta(history, volume=True, pattern=False) history.columns = map(str.lower, history.columns) history['return'] = history['close'].pct_change(1) history['next_return'] = history['return'].shift(-1) history = history.dropna().reset_index(drop=True) return history
def get_data(self): stock_df = yfinance.Ticker('QQQ').history(period='7y') stock_df = stock_df.reset_index() stock_df = get_ta(stock_df, volume=True, pattern=False) stock_df.columns = map(str.lower, stock_df.columns) stock_df["return"] = stock_df["close"].pct_change() stock_df.columns = map(str.lower, stock_df.columns) stock_df = stock_df.dropna() self.clean_train = stock_df[stock_df['date'] < self.cutoff_date] self.clean_test = stock_df[stock_df['date'] > self.cutoff_date]
def get_data(symbol): history = yfinance.Ticker(symbol).history(period='7y').reset_index() history = get_ta(history, volume=True, pattern=False) history.columns = map(str.lower, history.columns) history['return'] = history['close'].pct_change() * 100 history = history.dropna() history['next_return'] = history['return'].shift(-1) #num_rows = len(history) #train = history.head( int(num_rows * .75) ) #test = history.tail( int(num_rows *.25) ) history['symbol'] = symbol return history
def get_data(symbol): history = yfinance.Ticker(symbol).history(period=str(num_years)+'y', auto_adjust=False).reset_index() history = get_ta(history, volume=True, pattern=False) history.columns = map(str.lower, history.columns) history['date'] = pd.to_datetime(history['date']) #history['return'] = history['close'].pct_change() #history['future_price'] = history['close'].shift(-2) #history['next_return'] = (history['close'].shift(-3) - history['close']) / history['close'] history['next_return'] = history['close'].pct_change(2).shift(-2) print(history) #history['next_return'].shift(-2) pos_cutoff = history[ history['next_return']>0 ]['next_return'].mean() neg_cutoff = history[ history['next_return']<0 ]['next_return'].mean() #pos_cutoff = history['return'].mean() print('pos_cutoff', pos_cutoff, 'neg_cutoff', neg_cutoff) #print('pos_cutoff', pos_cutoff) history['return_class'] = 0 history.loc[ history['next_return']>pos_cutoff/2, 'return_class'] = 1 history.loc[ history['next_return']>pos_cutoff, 'return_class'] = 2 history.loc[ history['next_return']>pos_cutoff*2, 'return_class'] = 3 #history['return_class'] = pd.qcut(history['next_return'], q=[0, .33, .66, 1], labels=[0,1,2]) #history.loc[ history['next_return']<neg_cutoff/2, 'return_class'] = -1 #history.loc[ history['next_return']<neg_cutoff, 'return_class'] = -2 history = history.dropna().reset_index(drop=True) print('distribution',history.groupby(by=['return_class'])['open'].count() / len(history)) print(history.groupby(by=['return_class'])['next_return'].mean()) return history
def get_data(self): stock_df = yfinance.Ticker('QQQ').history(period='max') stock_df = stock_df.reset_index() stock_df = get_ta(stock_df, volume=True, pattern=False) stock_df.columns = map(str.lower, stock_df.columns) stock_df["return"] = stock_df["close"].pct_change() stock_df.columns = map(str.lower, stock_df.columns) stock_df = stock_df.dropna() cutoff_datetime = pd.to_datetime(self.cutoff_date) start_train = cutoff_datetime - timedelta(days=365 * self.train_length) end_train = cutoff_datetime start_test = cutoff_datetime end_test = cutoff_datetime + timedelta(days=365 * 1) self.clean_train = stock_df[ (stock_df['date'] > start_train) & (stock_df['date'] < end_train) ] self.clean_test = stock_df[ (stock_df['date'] > start_test) & (stock_df['date'] < end_test) ]
def get_data(tickers, period='5y', pattern=False): all_historic_data = [] for ticker in tickers: #print('getting data for', ticker) ticker_data = yfinance.Ticker(ticker) ticker_data = ticker_data.history(period=period, auto_adjust=False) ticker_data = get_ta(ticker_data, True, pattern) ticker_data = ticker_data.reset_index() ticker_data.columns = map(str.lower, ticker_data.columns) ticker_data["return"] = ticker_data["close"].pct_change() ticker_data["next_day_return"] = ticker_data['close'].shift( -5) / ticker_data["close"] - 1 ticker_data["range"] = (ticker_data["high"] / ticker_data["low"]) - 1 ticker_data = ticker_data.drop(columns=['dividends', 'stock splits']) ticker_data["ticker"] = ticker ticker_data.dropna(how="any", inplace=True) ticker_data = ticker_data.reset_index(drop=True) all_historic_data.append(ticker_data) history_df = pd.concat(all_historic_data) #print(history_df) history_df = history_df.dropna(thresh=100, axis=1) history_df = history_df.replace([np.inf, -np.inf], np.nan) history_df = history_df.dropna() history_df = history_df.sort_values(by=['date']) history_df = history_df.reset_index(drop=True) return history_df