Exemple #1
0
def get_data(symbol, test=None, predictions=None):

    history = yfinance.Ticker(symbol).history(period=str(num_years) + 'y',
                                              auto_adjust=False).reset_index()

    history = get_ta(history, volume=True, pattern=False)

    history.columns = map(str.lower, history.columns)

    history['date'] = pd.to_datetime(history['date'])

    history['next_return'] = history['close'].pct_change(1).shift(-1)

    pos_cutoff = history[history['next_return'] > 0]['next_return'].mean()
    neg_cutoff = history[history['next_return'] < 0]['next_return'].mean()
    history['return_class'] = 0
    #history.loc[ history['next_return']<neg_cutoff, 'return_class'] = -1
    #history.loc[ history['next_return']>0, 'return_class'] = 1
    history.loc[history['next_return'] > pos_cutoff / 6, 'return_class'] = 1
    #history.loc[ history['next_return']>pos_cutoff*2, 'return_class'] = 3

    history = history.dropna().reset_index(drop=True)
    print('cutoffs %.2f%%' % (pos_cutoff * 100))
    print('distribution', history.groupby(by=['return_class'])['open'].count())
    print(history.groupby(by=['return_class'])['open'].count() / len(history))
    print(history.groupby(by=['return_class'])['next_return'].mean())
    return history
Exemple #2
0
    def get_data(self, with_decision_tree=False):
        all_data = []
        for ticker in self.tickers:
            
            ticker_data = yfinance.Ticker(ticker)
            
            ticker_data = ticker_data.history(period=self.period, auto_adjust=False)
            ticker_data = ticker_data.reset_index()
            ticker_data.columns = map(str.lower, ticker_data.columns)
            ticker_data["return"] = ticker_data["close"].pct_change()
            #ticker_data["return"] = ticker_data["close"] / ticker_data["open"] - 1
            #ticker_data['target'] = ticker_data["close"].shift(-self.hold_length) / ticker_data["close"] - 1
            ticker_data['intraday'] = ticker_data["close"] / ticker_data["open"] - 1
            ticker_data = ticker_data.drop(columns=['dividends','stock splits'])
            ticker_data = get_ta(ticker_data, pattern = True)
            ticker_data.columns = map(str.lower, ticker_data.columns)
            
            ticker_data["ticker"] = ticker
            
            ticker_data["range"] = (ticker_data["high"]/ticker_data["low"])-1
            
            ticker_data.dropna(how="any", inplace=True)

            all_data.append(ticker_data)
        

        self.history_df = pd.concat(all_data)
        self.history_df = self.history_df.sort_values(by=['date'])
        
        
        #self.history_df = pd.concat([self.all_historic_data, ticker_data])
        print(self.history_df)
    def get_data(self):
        all_historic_data = []
        
        for ticker in self.tickers:
            #print('getting data for', ticker)
            ticker_data = yfinance.Ticker(ticker)
            ticker_data = ticker_data.history(period=self.period, auto_adjust=False)
            
            ticker_data = get_ta(ticker_data, True, self.pattern, self.hold_length)
            ticker_data = ticker_data.reset_index()
            ticker_data.columns = map(str.lower, ticker_data.columns)

            ticker_data["return"] = ticker_data["close"].pct_change()
            ticker_data["range"] = (ticker_data["high"]/ticker_data["low"])-1
            ticker_data = ticker_data.drop(columns=['dividends','stock splits'])

            ticker_data["ticker"] = ticker
            
            ticker_data.dropna(how="any", inplace=True)
            ticker_data = ticker_data.reset_index(drop=True)

            all_historic_data.append(ticker_data)
        
        self.history_df = pd.concat(all_historic_data)
        self.history_df = self.history_df.dropna(thresh=100,axis=1)
        
        self.history_df = self.history_df.sort_values(by=['date'])
        self.history_df = self.history_df.reset_index(drop=True)
        self.history_df.replace([np.inf, -np.inf], np.nan)

        self.history_df = self.history_df.dropna()
        
        self.history_df = self.history_df.replace([np.inf, -np.inf], np.nan)
        self.history_df = self.history_df.dropna()
Exemple #4
0
def get_data(symbol, period):
    history = yfinance.Ticker(symbol).history(period=period,
                                              auto_adjust=False).reset_index()
    history = get_ta(history, volume=True, pattern=False)
    history.columns = map(str.lower, history.columns)
    history['return'] = history['close'].pct_change(1)
    history['next_return'] = history['return'].shift(-1)
    history = history.dropna().reset_index(drop=True)
    return history
Exemple #5
0
 def get_data(self):
     stock_df = yfinance.Ticker('QQQ').history(period='7y')
     stock_df = stock_df.reset_index()
     stock_df = get_ta(stock_df, volume=True, pattern=False)
     stock_df.columns = map(str.lower, stock_df.columns)
     stock_df["return"] = stock_df["close"].pct_change()
     stock_df.columns = map(str.lower, stock_df.columns)
     stock_df = stock_df.dropna()
     self.clean_train = stock_df[stock_df['date'] < self.cutoff_date]
     self.clean_test = stock_df[stock_df['date'] > self.cutoff_date]
Exemple #6
0
def get_data(symbol):

    history = yfinance.Ticker(symbol).history(period='7y').reset_index()

    history = get_ta(history, volume=True, pattern=False)
    history.columns = map(str.lower, history.columns)
    history['return'] = history['close'].pct_change() * 100
    history = history.dropna()
    history['next_return'] = history['return'].shift(-1)

    #num_rows = len(history)
    #train = history.head( int(num_rows * .75) )
    #test = history.tail( int(num_rows *.25) )
    history['symbol'] = symbol
    return history
Exemple #7
0
def get_data(symbol):

    history = yfinance.Ticker(symbol).history(period=str(num_years)+'y', auto_adjust=False).reset_index()

    history = get_ta(history, volume=True, pattern=False)

    history.columns = map(str.lower, history.columns)
    history['date'] = pd.to_datetime(history['date'])
    #history['return'] = history['close'].pct_change()
    #history['future_price'] = history['close'].shift(-2)
    #history['next_return'] = (history['close'].shift(-3) - history['close']) / history['close']
    history['next_return'] = history['close'].pct_change(2).shift(-2)
    print(history)
    #history['next_return'].shift(-2)


    

    
    pos_cutoff = history[ history['next_return']>0 ]['next_return'].mean()
    neg_cutoff = history[ history['next_return']<0 ]['next_return'].mean()

    #pos_cutoff = history['return'].mean()


    print('pos_cutoff', pos_cutoff, 'neg_cutoff', neg_cutoff)

    #print('pos_cutoff', pos_cutoff)

    history['return_class'] = 0
    history.loc[ history['next_return']>pos_cutoff/2, 'return_class'] = 1
    history.loc[ history['next_return']>pos_cutoff, 'return_class'] = 2
    history.loc[ history['next_return']>pos_cutoff*2, 'return_class'] = 3

    #history['return_class'] = pd.qcut(history['next_return'], q=[0, .33, .66, 1], labels=[0,1,2])
    
    #history.loc[ history['next_return']<neg_cutoff/2, 'return_class'] = -1
    #history.loc[ history['next_return']<neg_cutoff, 'return_class'] = -2

    
    history = history.dropna().reset_index(drop=True)
    print('distribution',history.groupby(by=['return_class'])['open'].count() / len(history))
    print(history.groupby(by=['return_class'])['next_return'].mean())
    return history
    def get_data(self):
        stock_df = yfinance.Ticker('QQQ').history(period='max')
        stock_df = stock_df.reset_index()
        stock_df = get_ta(stock_df, volume=True, pattern=False)
        stock_df.columns = map(str.lower, stock_df.columns)
        stock_df["return"] = stock_df["close"].pct_change()
        stock_df.columns = map(str.lower, stock_df.columns)
        stock_df = stock_df.dropna()

        cutoff_datetime = pd.to_datetime(self.cutoff_date)

        start_train = cutoff_datetime - timedelta(days=365 * self.train_length)
        end_train = cutoff_datetime

        start_test = cutoff_datetime
        end_test = cutoff_datetime + timedelta(days=365 * 1)

        self.clean_train = stock_df[ (stock_df['date'] > start_train) & (stock_df['date'] < end_train) ]
        self.clean_test = stock_df[ (stock_df['date'] > start_test) & (stock_df['date'] < end_test) ]
Exemple #9
0
def get_data(tickers, period='5y', pattern=False):
    all_historic_data = []

    for ticker in tickers:
        #print('getting data for', ticker)
        ticker_data = yfinance.Ticker(ticker)
        ticker_data = ticker_data.history(period=period, auto_adjust=False)

        ticker_data = get_ta(ticker_data, True, pattern)
        ticker_data = ticker_data.reset_index()
        ticker_data.columns = map(str.lower, ticker_data.columns)

        ticker_data["return"] = ticker_data["close"].pct_change()
        ticker_data["next_day_return"] = ticker_data['close'].shift(
            -5) / ticker_data["close"] - 1
        ticker_data["range"] = (ticker_data["high"] / ticker_data["low"]) - 1
        ticker_data = ticker_data.drop(columns=['dividends', 'stock splits'])

        ticker_data["ticker"] = ticker

        ticker_data.dropna(how="any", inplace=True)
        ticker_data = ticker_data.reset_index(drop=True)

        all_historic_data.append(ticker_data)

    history_df = pd.concat(all_historic_data)
    #print(history_df)
    history_df = history_df.dropna(thresh=100, axis=1)

    history_df = history_df.replace([np.inf, -np.inf], np.nan)

    history_df = history_df.dropna()
    history_df = history_df.sort_values(by=['date'])
    history_df = history_df.reset_index(drop=True)

    return history_df