start = datetime(end.year - 1,end.month,end.day)

# In[23]:

#Getting DATA from Yahoo/Google
tech_list = ['AAPL','GOOG','MSFT','AMZN']

# In[24]:

from yahoo_fin import stock_info as si

# In[25]:

# globals() used for setting all the string names like AAPL, GOOG as global variables
for stock in tech_list:
    globals()[stock] = si.get_data(stock, start, end)

# In[26]:

# This global variable can get the data for this particular Stock
AAPL.head()

# In[27]:

MSFT.head()

# In[28]:

#Opening price, closing price, low, high and Split stock changes
GOOG.head()
Beispiel #2
0
def load_data(ticker,
              n_steps=50,
              scale=True,
              shuffle=True,
              lookup_step=1,
              test_size=0.2,
              feature_columns=['adjclose', 'volume', 'open', 'high', 'low']):
    # see if ticker is already a loaded stock from yahoo finance
    if isinstance(ticker, str):
        # load it from yahoo_fin library
        df = si.get_data(ticker)
    elif isinstance(ticker, pd.DataFrame):
        # already loaded, use it directly
        df = ticker
    # this will contain all the elements we want to return from this function
    result = {}
    # we will also return the original dataframe itself
    result['df'] = df.copy()
    # make sure that the passed feature_columns exist in the dataframe
    for col in feature_columns:
        assert col in df.columns, f"'{col}' does not exist in the dataframe."
    if scale:
        column_scaler = {}
        # scale the data (prices) from 0 to 1
        for column in feature_columns:
            scaler = preprocessing.MinMaxScaler()
            df[column] = scaler.fit_transform(
                np.expand_dims(df[column].values, axis=1))
            column_scaler[column] = scaler

        # add the MinMaxScaler instances to the result returned
        result["column_scaler"] = column_scaler
    # add the target column (label) by shifting by `lookup_step`
    df['future'] = df['adjclose'].shift(-lookup_step)
    # last `lookup_step` columns contains NaN in future column
    # get them before droping NaNs
    last_sequence = np.array(df[feature_columns].tail(lookup_step))
    # drop NaNs
    df.dropna(inplace=True)
    sequence_data = []
    sequences = deque(maxlen=n_steps)
    for entry, target in zip(df[feature_columns].values, df['future'].values):
        sequences.append(entry)
        if len(sequences) == n_steps:
            sequence_data.append([np.array(sequences), target])
    # get the last sequence by appending the last `n_step` sequence with `lookup_step` sequence
    # for instance, if n_steps=50 and lookup_step=10, last_sequence should be of 59 (that is 50+10-1) length
    # this last_sequence will be used to predict in future dates that are not available in the dataset
    last_sequence = list(sequences) + list(last_sequence)
    # shift the last sequence by -1
    last_sequence = np.array(pd.DataFrame(last_sequence).shift(-1).dropna())
    # add to result
    result['last_sequence'] = last_sequence
    # construct the X's and y's
    X, y = [], []
    for seq, target in sequence_data:
        X.append(seq)
        y.append(target)
    # convert to numpy arrays
    X = np.array(X)
    y = np.array(y)
    # reshape X to fit the neural network
    X = X.reshape((X.shape[0], X.shape[2], X.shape[1]))
    # split the dataset
    result["X_train"], result["X_test"], result["y_train"], result[
        "y_test"] = train_test_split(X,
                                     y,
                                     test_size=test_size,
                                     shuffle=shuffle)
    # return the result
    return result
Beispiel #3
0
def getTickerData(ticker):
    return si.get_data(ticker)
#df = pd.read_csv("companylist (1).csv")
#matrix2 = df[df.columns[0]].as_matrix()
#list3 = matrix2.tolist()
#tickerList=list2+list3
tickerList = list2
#for x in range (len(tickerList)):
#    print(tickerList[x])
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

for x in range(len(tickerList)):
    #print(tickerList[x])
    try:
        stockdata = si.get_data(tickerList[x], start_date='01/01/2019')
        #stockdata['RSI']=talib.RSI(stockdata['close'], timeperiod=14)
        #stockdata['MOM']=talib.MOM(stockdata['close'], timeperiod=12)
        #stockdata['ADX']=talib.ADX(stockdata['high'], stockdata['low'], stockdata['close'], timeperiod=14)
        stockdata['MACD'], stockdata['MACDSIGNAL'], stockdata[
            'MACDHIST'] = talib.MACD(stockdata['close'],
                                     fastperiod=12,
                                     slowperiod=26,
                                     signalperiod=9)
        #print(stockdata)

        #stockdata['RSI']=talib.SAR(stockdata['high'], stockdata['low'], acceleration=0.02, maximum=0.2)
        #if (float(stockdata.iloc[len(stockdata)-1]['RSI']) <= 40 and float(stockdata.iloc[len(stockdata)-1]['MOM']) >= 0):
        #if ((float(stockdata.iloc[len(stockdata)-2]['MACDSIGNAL']) < 0 and float(stockdata.iloc[len(stockdata)-1]['MACDSIGNAL']) > 0)  and float(stockdata.iloc[len(stockdata)-1]['MOM']) >= 0 and float(stockdata.iloc[len(stockdata)-1]['ADX']) > 25):
        if ((float(stockdata.iloc[len(stockdata) - 2]['MACDSIGNAL']) < 0
             and float(stockdata.iloc[len(stockdata) - 1]['MACDSIGNAL']) > 0)):
Beispiel #5
0
def load_data(ticker,
              n_steps=50,
              scale=True,
              shuffle=True,
              lookup_step=1,
              split_by_date=True,
              test_size=0.2,
              feature_columns=['adjclose', 'volume', 'open', 'high', 'low']):

    # Loads data from Yahoo Finance, shuffles, scales

    # See if ticker is already a loaded stock from yahoo finance
    if isinstance(ticker, str):
        # load it from yahoo_fin library
        df = si.get_data(ticker)
    elif isinstance(ticker, pd.DataFrame):
        # already loaded, use it directly
        df = ticker
    else:
        raise TypeError(
            "ticker can be either a str or a `pd.DataFrame` instances")
    # Container for the results
    result = {}
    # Return original df
    result['df'] = df.copy()
    # Make sure that the passed feature_columns exist in the dataframe
    for col in feature_columns:
        assert col in df.columns, f"'{col}' does not exist in the dataframe."
    # Add date as a column
    if "date" not in df.columns:
        df["date"] = df.index
    if scale:
        column_scaler = {}
        # Scale the data (prices) from 0 to 1
        for column in feature_columns:
            scaler = preprocessing.MinMaxScaler()
            df[column] = scaler.fit_transform(
                np.expand_dims(df[column].values, axis=1))
            column_scaler[column] = scaler
        # Add the MinMaxScaler instances to the result returned
        result["column_scaler"] = column_scaler
    # Add the target column (label) by shifting by `lookup_step`
    df['future'] = df['adjclose'].shift(-lookup_step)
    # Last `lookup_step` columns contains NaN in future column
    # Get them before droping NaNs
    last_sequence = np.array(df[feature_columns].tail(lookup_step))
    # Drop NaNs
    df.dropna(inplace=True)
    sequence_data = []
    sequences = deque(maxlen=n_steps)
    for entry, target in zip(df[feature_columns + ["date"]].values,
                             df['future'].values):
        sequences.append(entry)
        if len(sequences) == n_steps:
            sequence_data.append([np.array(sequences), target])
    # Get the last sequence by appending the last `n_step` sequence with `lookup_step` sequence
    # For instance, if n_steps=50 and lookup_step=10, last_sequence should be of 60 (that is 50+10) length
    # This last_sequence will be used to predict future stock prices that are not available in the dataset
    last_sequence = list([s[:len(feature_columns)]
                          for s in sequences]) + list(last_sequence)
    last_sequence = np.array(last_sequence).astype(np.float32)
    # add to result
    result['last_sequence'] = last_sequence
    # Construct the X's and y's
    X, y = [], []
    for seq, target in sequence_data:
        X.append(seq)
        y.append(target)
    # Convert to numpy arrays
    X = np.array(X)
    y = np.array(y)
    if split_by_date:
        # Split the dataset into training & testing sets by date (not randomly splitting)
        train_samples = int((1 - test_size) * len(X))
        result["X_train"] = X[:train_samples]
        result["y_train"] = y[:train_samples]
        result["X_test"] = X[train_samples:]
        result["y_test"] = y[train_samples:]
        if shuffle:
            # Shuffle the datasets for training (if shuffle parameter is set)
            shuffle_dataset(result["X_train"], result["y_train"])
            shuffle_dataset(result["X_test"], result["y_test"])
    else:
        # Split the dataset randomly
        result["X_train"], result["X_test"], result["y_train"], result[
            "y_test"] = train_test_split(X,
                                         y,
                                         test_size=test_size,
                                         shuffle=shuffle)
    # Get the list of test set dates
    dates = result["X_test"][:, -1, -1]
    # Retrieve test features from the original dataframe
    result["test_df"] = result["df"].loc[dates]
    # Remove duplicated dates in the testing dataframe
    result["test_df"] = result["test_df"][~result["test_df"].index.duplicated(
        keep='first')]
    # Remove dates from the training/testing sets & convert to float32
    result["X_train"] = result["X_train"][:, :, :len(feature_columns)].astype(
        np.float32)
    result["X_test"] = result["X_test"][:, :, :len(feature_columns)].astype(
        np.float32)
    return result
Beispiel #6
0
def PlotTimeSeries(ticker, years_ago=5, verbose_mode=False):#, months_ago=0):
    """Returns monthly Price Chart and iterated Support/Resistance Lines for input stock ticker.

        Parameters:
            ticker: string
                    Stock ticker to be charted, must conform to Yahoo Finance format
                    e.g., RY.TO, QAN.AX
            years_ago: integer, default 5
                    Number of years of stock price history to chart
            verbose_mode: bool, default False
                    Calls out Breach points
                    Good for additional analysis or testing
    """    
    
    # There are two Yahoo Modules we can use to pull our data (closeHist)
    # We'll pull from one and if we get an error will use the alternate
    try:
        closeHist = pd.DataFrame(yf.download(ticker,
                                             period='max', 
                                             progress=False)['Close']).rename({'Close':'Price'}, axis=1)
        #closeHist = pd.DataFrame(yf.Ticker(ticker).history(period='max')['Close']).rename({'Close':'Price'}, axis=1)
        closeHist.index = closeHist.index.to_pydatetime()
        closeHist.index.name = 'Date'
    except json.JSONDecodeError:
        closeHist = pd.DataFrame(y_fin.get_data(ticker)['close']).rename({'close':'Price'}, axis=1)
        closeHist.index = closeHist.index.to_pydatetime()
        closeHist.index.name = 'Date'
    # Trim our data to years_ago
    closeHist = closeHist[closeHist.index > dt.datetime.now() + relativedelta(years=-years_ago)]
    closeHist.reset_index(inplace=True)
    #Calculate monthly avg. Price
    closeHist['Month'] = closeHist.Date.apply(lambda x: dt.date(x.year, x.month, 1))
    closeHist = closeHist.groupby('Month').last().rename({'Price':'Price(Monthly avg.)'}, axis=1)
    closeHist['x_index'] = pd.Series(range(len(closeHist.index)), closeHist.index)

    # Find Peaks and Troughs (Local Maximums and Minimums)
    MinSeries = closeHist['Price(Monthly avg.)'][(closeHist['Price(Monthly avg.)'].shift(1) > closeHist['Price(Monthly avg.)']) &  
                                                 (closeHist['Price(Monthly avg.)'].shift(-1) > closeHist['Price(Monthly avg.)'])]
    MaxSeries = closeHist['Price(Monthly avg.)'][(closeHist['Price(Monthly avg.)'].shift(1) < closeHist['Price(Monthly avg.)']) &  
                                                 (closeHist['Price(Monthly avg.)'].shift(-1) < closeHist['Price(Monthly avg.)'])]
    
    
    MinSeries = pd.concat([MinSeries, 
                           closeHist['Price(Monthly avg.)'][(closeHist.index <= MaxSeries.index[0])&
                                                            (closeHist['Price(Monthly avg.)'] < MaxSeries.iloc[0])].head(1)]).sort_index()

    
    #BothSeries = pd.concat([MinSeries, MaxSeries]).sort_index()
    #MaxMaxSeries = BothSeries[(BothSeries.shift(1) < BothSeries) & (BothSeries.shift(-1) < BothSeries)]
    #MinMinSeries = BothSeries[(BothSeries.shift(1) > BothSeries) & (BothSeries.shift(-1) > BothSeries)]
    
    

    #3PTL Buy Line
    X = list()
    Y = list()
    x_1_date = MaxSeries.idxmax()
    x_1 = closeHist[closeHist.index==x_1_date].x_index.iloc[0]
    X.append(x_1)
    Y.append(MaxSeries.max())
    try:
        x_2_date = MaxSeries[MaxSeries.index > x_1_date].idxmax()
        x_2 = closeHist[closeHist.index==x_2_date].x_index.iloc[0]
        X.append(x_2)
        Y.append(MaxSeries[MaxSeries.index > x_1_date].max())
    except ValueError:
        pass
    #3PTL Sell Line
    X2 = list()
    Y2 = list()
    x2_1_date = MinSeries.idxmin()
    x2_1 = closeHist[closeHist.index==x2_1_date].x_index.iloc[0]
    X2.append(x2_1)
    Y2.append(MinSeries.min())
    try:
        x2_2_date = MinSeries[MinSeries.index > x2_1_date].idxmin()
        x2_2 = closeHist[closeHist.index==x2_2_date].x_index.iloc[0]
        X2.append(x2_2)
        Y2.append(MinSeries[MinSeries.index > x2_1_date].min())
    except ValueError:
        pass

    print('Current Price for', ticker, 'is', str(round(closeHist['Price(Monthly avg.)'].iloc[-1], 2)))

    sellLine_list = list()
    buyLine_list = list()

    #Calculate and plot Sell line:
    if len(X2) < 2:
    # IF WE CANNOT BUILD A SELL LINE USING MAX, START WITH FIRST TWO TROUGHS
        X2 = list(closeHist.loc[MinSeries.index]['x_index'].iloc[:2])
        Y2 = list(closeHist.loc[MinSeries.index]['Price(Monthly avg.)'].iloc[:2])
        ThreePtS = drawLine2P(x=X2,y=Y2,xlims=[closeHist['x_index'].values.min(),
                                               closeHist['x_index'].values.max()+1])
        sellLine_list.append(ThreePtS[1])
    else: 
        ThreePtS = drawLine2P(x=X2,y=Y2,xlims=[closeHist['x_index'].values.min(),
                                  closeHist['x_index'].values.max()+1])
        sellLine_list.append(ThreePtS[1])

    #Calculate and plot Buy line:
    if len(X) < 2:
        pass
    else: 
        ThreePtB = drawLine2P(x=X,y=Y,xlims=[closeHist['x_index'].values.min(),
                                  closeHist['x_index'].values.max()+1])
        buyLine_list.append(ThreePtB[1])


    Buy_Breach = max(closeHist[closeHist.x_index.isin(X2)].index)
    if verbose_mode:
        n = 1 #TESTING
    while Buy_Breach:
        # FIRST BUY ITERATION
        latestHist = closeHist.loc[Buy_Breach:]
        subSell = latestHist.index[latestHist['Price(Monthly avg.)'] < pd.Series(ThreePtS[1], closeHist.index).loc[Buy_Breach:]]
        if len(subSell) > 0:
            Sell_Breach = subSell[0]        
            preBreach = MaxSeries[MaxSeries.index < Sell_Breach].index
            postBreach = MaxSeries[MaxSeries.index > Sell_Breach].index
            if verbose_mode:
                print("{} Sell Breach at {}, this is Breach #{}".format(ticker, Sell_Breach, n)) #TESTING
                n+=1
            if len(postBreach) > 0:
                pt_1 = closeHist.loc[closeHist.loc[preBreach]['Price(Monthly avg.)'].idxmax()]
                pt_2 = closeHist.loc[postBreach[0]]
                Y2 = [pt_1['Price(Monthly avg.)'], pt_2['Price(Monthly avg.)']]
                X2 = [pt_1['x_index'], pt_2['x_index']]
                ThreePtB = drawLine2P(x=X2,y=Y2,xlims=[closeHist['x_index'].values.min(),
                                                       closeHist['x_index'].values.max()+1])
    #                plt.plot(closeHist.index, ThreePtB[1],
    #                         c='g', linestyle='dashed', 
    #                         alpha=buyAlpha)
                buyLine_list.append(ThreePtB[1])
            else:
                Sell_Breach = None
                break        
        else:
            Sell_Breach = None
            break
        while Sell_Breach:
            # FIRST SELL ITERATION
            latestHist = closeHist.loc[Sell_Breach:]
            superBuy = latestHist.index[latestHist['Price(Monthly avg.)'] > pd.Series(ThreePtB[1], closeHist.index).loc[Sell_Breach:]]
            if len(superBuy) > 0:
                Buy_Breach = superBuy[0]
                preBreach = MinSeries[MinSeries.index < Buy_Breach].index
                postBreach = MinSeries[MinSeries.index > Buy_Breach].index
                if verbose_mode:
                    print("{} Buy Breach at {}, this is Breach #{}".format(ticker, Buy_Breach, n)) #TESTING
                    n+=1
                if len(postBreach) > 0:
                    pt_1 = closeHist.loc[closeHist.loc[preBreach]['Price(Monthly avg.)'].idxmin()]
                    pt_2 = closeHist.loc[postBreach[0]]
                    Y2 = [pt_1['Price(Monthly avg.)'], pt_2['Price(Monthly avg.)']]
                    X2 = [pt_1['x_index'], pt_2['x_index']]
                    ThreePtS = drawLine2P(x=X2,y=Y2,xlims=[closeHist['x_index'].values.min(),
                                                           closeHist['x_index'].values.max()+1])
    #                    plt.plot(closeHist.index, ThreePtS[1],
    #                             c='r', linestyle='dashed', 
    #                             alpha=sellAlpha)
                    sellLine_list.append(ThreePtS[1])

                    break
                else:
                    Buy_Breach = None
                    break
            else:
                Buy_Breach = None
                break
    #sellLine_alpha = np.linspace(0.1, 1, len(sellLine_list))
    #buyLine_alpha = np.linspace(0.1, 1, len(buyLine_list))
    sellLine_alpha = np.flipud(np.linspace(1, 0.1, len(sellLine_list)+1)[:-1])
    buyLine_alpha = np.flipud(np.linspace(1, 0.1, len(buyLine_list)+1)[:-1])



    if len(sellLine_list) > 0:
        sellPrice = round(sellLine_list[-1][-1], 2)
        if sellPrice < 0:
            sellPrice = round(0.00, 2)            
        print('Sell Price for', ticker, 'is', sellPrice)
    if len(buyLine_list) > 0:
        buyPrice = round(buyLine_list[-1][-1], 2)
        if buyPrice < 0:
            buyPrice = round(0.00, 2)
        print('Buy Price for', ticker, 'is', buyPrice)

    plt.figure(figsize=[20,9])
    with plt.style.context('fivethirtyeight'):
        plt.plot(closeHist['Price(Monthly avg.)'], zorder=0)
        
        if verbose_mode:
            for i in np.arange(len(sellLine_list)):
                plt.plot(closeHist.index, sellLine_list[i],
                         c='r', linestyle='dashed', 
                         alpha=sellLine_alpha[i])

            for i in np.arange(len(buyLine_list)):
                plt.plot(closeHist.index, buyLine_list[i],
                         c='g', linestyle='dashed', 
                         alpha=buyLine_alpha[i])

        if len(sellLine_list) > 0:
            plt.plot(closeHist.index, sellLine_list[-1],
                     c='r',
                     alpha=1)
        
        if len(buyLine_list) > 0:
            plt.plot(closeHist.index, buyLine_list[-1],
                     c='g', 
                     alpha=1)  

        plt.scatter(MinSeries.index, 
                    MinSeries,
                    c='r', s=50, zorder=10)
        plt.scatter(MaxSeries.index, 
                    MaxSeries,
                    c='g', s=50, zorder=10)
    #    plt.scatter(MaxMaxSeries.index, 
    #                MaxMaxSeries,
    #                c='y', s=100, zorder=5)
    #    plt.scatter(MinMinSeries.index, 
    #                MinMinSeries,
    #                c='y', s=100, zorder=5)
    plt.title("Buy and Sell Lines for "+ ticker, {'fontsize':20})
    plt.autoscale()
    num = closeHist['Price(Monthly avg.)'].min()
    Y_lim_min = math.floor(num / 10 ** math.floor(math.log10(num))) * 10 ** math.floor(math.log10(num))
    num = closeHist['Price(Monthly avg.)'].max()
    Y_lim_max = math.ceil(num / 10 ** math.floor(math.log10(num))) * 10 ** math.floor(math.log10(num))
    plt.ylim(0, Y_lim_max)#,Y_lim_max)
    plt.show()
def load_data(ticker,
              n_steps=70,
              scale=True,
              shuffle=True,
              lookup_step=1,
              test_size=0.35,
              feature_columns=['adjclose', 'volume', 'open', 'high', 'low']):
    """
    Loads data from Yahoo Finance source, as well as scaling, shuffling, normalizing and splitting.
    Params:
        ticker (str/pd.DataFrame): the ticker you want to load, examples include AAPL, TESL, etc.
        n_steps (int): the historical sequence length (i.e window size) used to predict, default is 50
        scale (bool): whether to scale prices from 0 to 1, default is True
        shuffle (bool): whether to shuffle the data, default is True
        lookup_step (int): the future lookup step to predict, default is 1 (e.g next day)
        test_size (float): ratio for test data, default is 0.2 (20% testing data)
        feature_columns (list): the list of features to use to feed into the model, default is everything grabbed from yahoo_fin
    """
    # see if ticker is already a loaded stock from yahoo finance
    if isinstance(ticker, str):
        # load it from yahoo_fin library
        df = si.get_data(ticker)
    elif isinstance(ticker, pd.DataFrame):
        # already loaded, use it directly
        df = ticker
    else:
        raise TypeError(
            "ticker can be either a str or a `pd.DataFrame` instances")
    # this will contain all the elements we want to return from this function
    result = {}
    # we will also return the original dataframe itself
    result['df'] = df.copy()
    # make sure that the passed feature_columns exist in the dataframe
    for col in feature_columns:
        assert col in df.columns, f"'{col}' does not exist in the dataframe."
    if scale:
        column_scaler = {}
        # scale the data (prices) from 0 to 1
        for column in feature_columns:
            scaler = preprocessing.MinMaxScaler()
            df[column] = scaler.fit_transform(
                np.expand_dims(df[column].values, axis=1))
            column_scaler[column] = scaler
        # add the MinMaxScaler instances to the result returned
        result["column_scaler"] = column_scaler
    # add the target column (label) by shifting by `lookup_step`
    df['future'] = df['adjclose'].shift(-lookup_step)
    # last `lookup_step` columns contains NaN in future column
    # get them before droping NaNs
    last_sequence = np.array(df[feature_columns].tail(lookup_step))
    # drop NaNs
    df.dropna(inplace=True)
    sequence_data = []
    sequences = deque(maxlen=n_steps)
    for entry, target in zip(df[feature_columns].values, df['future'].values):
        sequences.append(entry)
        if len(sequences) == n_steps:
            sequence_data.append([np.array(sequences), target])
    # get the last sequence by appending the last `n_step` sequence with `lookup_step` sequence
    # for instance, if n_steps=50 and lookup_step=10, last_sequence should be of 60 (that is 50+10) length
    # this last_sequence will be used to predict future stock prices not available in the dataset
    last_sequence = list(sequences) + list(last_sequence)
    last_sequence = np.array(last_sequence)
    # add to result
    result['last_sequence'] = last_sequence
    # construct the X's and y's
    X, y = [], []
    for seq, target in sequence_data:
        X.append(seq)
        y.append(target)
    # convert to numpy arrays
    X = np.array(X)
    y = np.array(y)
    # reshape X to fit the neural network
    X = X.reshape((X.shape[0], X.shape[2], X.shape[1]))
    # split the dataset
    result["X_train"], result["X_test"], result["y_train"], result[
        "y_test"] = train_test_split(X,
                                     y,
                                     test_size=test_size,
                                     shuffle=shuffle)
    # return the result
    return result
import datetime
from bokeh.plotting import figure, output_file, show
from bokeh.embed import components
from bokeh.models import HoverTool, Plot
from bokeh.io import output_notebook, push_notebook, show

app_pradhan = Flask(__name__)
app_pradhan.config.from_object(__name__)
app_pradhan.config['SECRET_KEY'] = '7d441f27d441f27567d441f2b6176a'

# We will look at stock prices over 2010
start = datetime.date(2010, 1, 1)
end = datetime.date.today()

# Micosoft stock data
msft = si.get_data('msft', start, end).close
msft = pd.DataFrame(msft)
msft.reset_index(level=0, inplace=True)
#msft['date']=pd.to_datetime(msft['date'])

# Plot closing price of MSFT and user-selected stock
TOOLS = 'save,pan,box_zoom,reset,wheel_zoom,hover'
plot = figure(plot_height=300,
              sizing_mode='scale_width',
              x_axis_type="datetime",
              tools=TOOLS)

plot.line(msft['date'], msft['close'], legend="MSFT", color="blue")

plot.xaxis.axis_label = 'Time'
plot.yaxis.axis_label = 'Close price in USD'
Beispiel #9
0
#description: this program uses the dual moving average croissover to determine when to buy and sell stock

#Import the libraries 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# import stock_info module from yahoo_fin
from yahoo_fin import stock_info as si

plt.style.use('fivethirtyeight')

# or Amazon
amzn = si.get_live_price("amzn")
amzn_data = si.get_data('amzn' , start_date = '01/01/2010')
 
# or any other ticker
tsla = si.get_live_price("tsla")
tsla_data = si.get_data('tsla' , start_date = '01/01/2010')

APPL = si.get_live_price("aapl")
APPL_data = si.get_data('aapl' , start_date = '01/01/2010')

#Visualize the data
plt.figure(figsize=(12.5, 4.5))
plt.plot(APPL_data['adjclose'], label = 'Apple' )
plt.plot(tsla_data['adjclose'], label = 'Tesla' )
plt.plot(amzn_data['adjclose'], label = 'Amazon' )
plt.title('Adj. Close Price History')
plt.xlabel('Date')
Beispiel #10
0
async def DiffD(ctx, userinput):
    userinput
    try:
        userinput
        time = datetime.datetime.now()
        negDay = datetime.timedelta(days=1)
        Yesterday = time - negDay

        #For if the command is ran on the weekend or a US holiday
        if time.weekday() == 6 or time.weekday() == 5:
            negDaySaturday = datetime.timedelta(days=2)
            YesterdaySaturday = time - negDaySaturday

            #Getting price
            dayPrice = si.get_data(userinput, start_date=time, end_date=time)
            YesterdayPrice = si.get_data(userinput,
                                         start_date=YesterdaySaturday,
                                         end_date=YesterdaySaturday)

            #ollecting the data Frame
            dfDayPrice = pd.DataFrame(dayPrice)
            dfYesterdayPrice = pd.DataFrame(YesterdayPrice)
            #Getting the prices
            finalDayPrice = dfDayPrice.close[0]
            finalYesterdayPrice = dfYesterdayPrice.close[0]
            #Finding the change in price
            change = (
                (finalDayPrice - finalYesterdayPrice) / finalDayPrice) * 100
            priceChange = finalDayPrice - finalYesterdayPrice
            #Printing the change
            await ctx.send(
                f'The change in {userinput.upper()} in one day is roughly {round(change, 2)}% or ${round(priceChange, 4)}  per share https://ca.finance.yahoo.com/quote/{userinput}?p={userinput}'
            )

        #After Hours Monday
        elif Yesterday.weekday() == 6:
            #Getting price
            dayPrice = si.get_data(userinput, start_date=time, end_date=time)
            YesterdayPrice = si.get_data(userinput,
                                         start_date=Yesterday,
                                         end_date=Yesterday)

            #ollecting the data Frame
            dfDayPrice = pd.DataFrame(dayPrice)
            dfYesterdayPrice = pd.DataFrame(YesterdayPrice)
            #Getting the prices
            finalDayPrice = dfDayPrice.close[0]
            finalYesterdayPrice = dfYesterdayPrice.close[0]
            #Finding the change in price
            change = (
                (finalDayPrice - finalYesterdayPrice) / finalDayPrice) * 100
            priceChange = finalDayPrice - finalYesterdayPrice
            #Printing the change
            await ctx.send(
                f'The change in {userinput.upper()} in one day is roughly {round(change, 2)}% or ${round(priceChange, 4)}  per share https://ca.finance.yahoo.com/quote/{userinput}?p={userinput}'
            )

        else:

            #Getting price
            dayPrice = si.get_data(userinput, start_date=time, end_date=time)
            YesterdayPrice = si.get_data(userinput,
                                         start_date=Yesterday,
                                         end_date=Yesterday)

            #ollecting the data Frame
            dfDayPrice = pd.DataFrame(dayPrice)
            dfYesterdayPrice = pd.DataFrame(YesterdayPrice)
            #Getting the prices
            finalDayPrice = dfDayPrice.close[0]
            finalYesterdayPrice = dfYesterdayPrice.close[0]
            #Finding the change in price
            change = (
                (finalDayPrice - finalYesterdayPrice) / finalDayPrice) * 100
            priceChange = finalDayPrice - finalYesterdayPrice
            #Printing the change
            await ctx.send(
                f'The change in {userinput.upper()} in one day is roughly {round(change, 2)}% or ${round(priceChange, 4)}  per share https://ca.finance.yahoo.com/quote/{userinput}?p={userinput}'
            )

    except (AssertionError, KeyError):
        await ctx.send(
            f"Sorry that ticker {userinput.upper()} does not exist. This bot is only made for tickers on the nasdaq"
        )
Beispiel #11
0
def get_historical_data(ticker, startdate, enddate):
    hist = si.get_data(ticker,
                       start_date=startdate,
                       end_date=enddate,
                       interval="1d")
    return hist
Beispiel #12
0
async def Diffm(ctx, userinput):
    userinput
    try:
        userinput
        time = datetime.datetime.now()
        pastMonth = time - dateutil.relativedelta.relativedelta(months=1)

        #For if the command is ran on the weekend or a US holiday
        if time.weekday() == 6 or time.weekday() == 5:
            #Getting price
            dayPrice = si.get_data(userinput, start_date=time, end_date=time)
            pastMonthPrice = si.get_data(userinput,
                                         start_date=pastMonth,
                                         end_date=pastMonth)
            #collecting the data Frame
            dfDayPrice = pd.DataFrame(dayPrice)
            dfPastMonthPrice = pd.DataFrame(pastMonthPrice)
            #Getting the prices
            finalDayPrice = dfDayPrice.close[0]
            finalPastMonthPrice = dfPastMonthPrice.close[0]
            #Finding the change in price
            change = (
                (finalDayPrice - finalPastMonthPrice) / finalDayPrice) * 100
            priceChange = finalDayPrice - finalPastMonthPrice
            #Printing the change
            await ctx.send(
                f'The change in {userinput.upper()} for one month is roughly {round(change, 2)}% or ${round(priceChange, 4)}  per share https://ca.finance.yahoo.com/quote/{userinput}?p={userinput}'
            )

        #After Hours Monday
        elif pastMonth.weekday() == 6:
            #Getting price
            dayPrice = si.get_data(userinput, start_date=time, end_date=time)
            pastMonthPrice = si.get_data(userinput,
                                         start_date=pastMonth,
                                         end_date=pastMonth)
            #collecting the data Frame
            dfDayPrice = pd.DataFrame(dayPrice)
            dfPastMonthPrice = pd.DataFrame(pastMonthPrice)
            #Getting the prices
            finalDayPrice = dfDayPrice.close[0]
            finalPastMonthPrice = dfPastMonthPrice.close[0]
            #Finding the change in price
            change = (
                (finalDayPrice - finalPastMonthPrice) / finalDayPrice) * 100
            priceChange = finalDayPrice - finalPastMonthPrice
            #Printing the change
            await ctx.send(
                f'The change in {userinput.upper()} for one month is roughly {round(change, 2)}% or ${round(priceChange, 4)}  per share https://ca.finance.yahoo.com/quote/{userinput}?p={userinput}'
            )

        else:

            #Getting price
            dayPrice = si.get_data(userinput, start_date=time, end_date=time)
            pastMonthPrice = si.get_data(userinput,
                                         start_date=pastMonth,
                                         end_date=pastMonth)
            #collecting the data Frame
            dfDayPrice = pd.DataFrame(dayPrice)
            dfPastMonthPrice = pd.DataFrame(pastMonthPrice)
            #Getting the prices
            finalDayPrice = dfDayPrice.close[0]
            finalPastMonthPrice = dfPastMonthPrice.close[0]
            #Finding the change in price
            change = (
                (finalDayPrice - finalPastMonthPrice) / finalDayPrice) * 100
            priceChange = finalDayPrice - finalPastMonthPrice
            #Printing the change
            await ctx.send(
                f'The change in {userinput.upper()} for one month is roughly {round(change, 2)}% or ${round(priceChange, 4)}  per share https://ca.finance.yahoo.com/quote/{userinput}?p={userinput}'
            )

    except (AssertionError, KeyError):
        await ctx.send(
            f"Sorry that ticker {userinput.upper()} does not exist. This bot is only made for tickers on the nasdaq"
        )
Beispiel #13
0
def output():

    config = tf.ConfigProto(device_count={'' 'GPU': 1, 'CPU': 6})
    sess = tf.Session(config=config)
    keras.backend.set_session(sess)

    scaler = MinMaxScaler(feature_range=(0, 1))
    """
    start = input("What is the start date?  MM/DD/YYYY: ")
    end = input("What is the end date? MM/DD/YYYY: ")
    time = input("How far forward do you want the model to predict?, increment between 1-5: ")
    ticker = input("What stock do you want to predict, ticker name(CAPS): ")
    """
    ticker = entry3.get()
    start = entry1.get()
    end = entry2.get()
    time = entry4.get()

    try:
        si.get_live_price(ticker)
    except BaseException:
        raise

    week = si.get_data(ticker, start_date=start, end_date=end)

    week = week.iloc[:, 0]
    week.to_numpy()
    stock_price = []
    root.destroy()

    for i in range(0, week.shape[0]):
        stock_price.append(week[i])
    stock_price = [stock_price]
    stock_price = np.asarray(stock_price, dtype=np.float32)
    stock_price = np.reshape(stock_price,
                             (stock_price.shape[1], stock_price.shape[0]))
    training_processed = stock_price
    training = training_processed
    testing = training_processed
    training_scaled = scaler.fit_transform(training)
    testing_scaled = scaler.fit_transform(testing)

    features_set = []
    labels = []

    for i in range(len(training_scaled)):
        features_set.append(training_scaled[i])
    features_set.remove(features_set[i])

    for i in range(1, len(training_scaled)):
        labels.append(training_scaled[i])

    features_set, labels = np.array(features_set), np.array(labels)

    features_set = np.reshape(
        features_set, (features_set.shape[0], features_set.shape[1], 1))

    model = Sequential()
    model.add(
        LSTM(units=50,
             return_sequences=True,
             input_shape=(features_set.shape[1], 1)))
    model.add(Dropout(0.2))

    model.add(LSTM(units=50, return_sequences=True))
    model.add(Dropout(0.2))

    model.add(LSTM(units=100, return_sequences=True, activation="relu"))
    model.add(Dropout(0.2))

    model.add(LSTM(units=100, return_sequences=True, activation="relu"))
    model.add(Dropout(0.2))

    model.add(LSTM(units=100, return_sequences=True, activation="relu"))
    model.add(Dropout(0.2))

    model.add(LSTM(units=100, return_sequences=True, activation="relu"))
    model.add(Dropout(0.2))

    model.add(LSTM(units=50))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(features_set, labels, epochs=150, batch_size=64)

    a = []
    counter = 0
    a.append(np.reshape(training_scaled[training_scaled.size - 1], (1, 1, 1)))
    while counter < int(time):
        a.append(model.predict(np.reshape(a[len(a) - 1], (1, 1, 1))))
        counter += 1

    a = np.reshape(a, (len(a), 1))

    temp = np.reshape(testing_scaled, (testing_scaled.size, 1, 1))
    temp = model.predict(temp)
    temp = np.reshape(temp, (testing_scaled.size, 1))
    temp = scaler.inverse_transform(temp)
    a = scaler.inverse_transform(a)

    a = np.append(temp, a)
    a = a.tolist()
    training_scaled = scaler.inverse_transform(training_scaled)
    training_scaled = training_scaled.tolist()

    training_final = []
    for i in range(len(training_scaled)):
        training_final.append(training_scaled[i][0])

    plt.figure(figsize=(10, 6))
    plt.plot(training_final,
             color='blue',
             label='Actual ' + ticker + ' Stock Price')
    plt.plot(a, color='red', label='Predicted ' + ticker + ' Stock Price')
    plt.xlabel('Date (days)')
    plt.ylabel('Stock Price (USD)')
    plt.legend()
    plt.show()
from collections import deque

import os
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
from tensorflow.keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler

# TEST FILE

pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)

df = si.get_data("^N225", start_date="01/01/2019", end_date="01/01/2020")

result = {}
# we will also return the original dataframe itself
result['df'] = df.copy()

df["date"] = df.index
df.reset_index(inplace=True)

feature_columns = ['adjclose', 'volume', 'open', 'high', 'low']

column_scaler = {}
# scale the data (prices) from 0 to 1
for column in feature_columns:
    scaler = preprocessing.MinMaxScaler()
    df[column] = scaler.fit_transform(np.expand_dims(df[column].values,
Beispiel #15
0
    "MSFT": "Microsoft Corporation",
    "NFLX": "Netflix Inc.",
    "TSLA": "Tesla Inc.",
    "INR=X": "USD/INR"
}
print('Index Ticker Company')
for i in range(len(stocks)):
    print(i + 1, stocks[i], stock_dic[stocks[i]])
index = int(input("Enter the index ")) - 1
stock = stocks[index]
stock_name = stock_dic[stock]

datelive = datetime.date.today().strftime("%d/%m/%Y")
datalive = si.get_data(stock,
                       start_date='01/06/2008',
                       end_date=datelive,
                       index_as_date=False,
                       interval="1d")
dataset = datalive.loc[:, ['open', 'high', 'low', 'adjclose']]

obs = np.arange(1, len(dataset) + 1, 1)

OHLC_avg = dataset.mean(axis=1)
HLC_avg = dataset[['high', 'low', 'adjclose']].mean(axis=1)
close_val = dataset[['adjclose']]

plt.title(stock_name + " Stocks")
plt.plot(obs, OHLC_avg, 'r', label='OHLC avg')
plt.plot(obs, HLC_avg, 'b', label='HLC avg')
plt.plot(obs, close_val, 'g', label='Closing price')
plt.legend(loc='upper right')
Beispiel #16
0
    def get_past_balance(self,
                         d,
                         h_prices_path=('/Users/xiaolan/Documents'
                                        '/repos/PineCone/local_src'
                                        '/historical_prices.csv')):
        """
        only for broker acct
        calculate past balance from all positions
        given a historical price sheet
        TODO: if the price at a certain date is not available, update the date
        """
        # proposed order

        # 1. get historical price
        # 2. get all tickers
        # 3. go through the historical price.
        # 3-1. if price not available: get price and update price list
        # 4. calculate balance
        # 5. update price set if needed

        if not os.path.exists(h_prices_path):
            prices = pd.DataFrame({
                'price': [100],
                'ticker': ['DUMMY'],
                'date': [date(1900, 1, 1)]
            })

            prices.to_csv(h_prices_path, index=False)

        h_prices = pd.read_csv(h_prices_path)

        blc = 0
        prices = h_prices[h_prices.date == d.strftime("%Y-%m-%d")]
        available_tickers = prices.ticker.tolist()
        num_h_prices_entries = h_prices.shape[0]

        for ticker, params in self.get_MMF().items():
            position = params[1]
            blc += position

        for get_p in [self.get_stocks, self.get_ETF, self.get_bonds]:

            for ticker, params in get_p().items():
                position = params[1]
                if position != 0:
                    if ticker in available_tickers:
                        price = prices[prices.ticker == ticker].price.values[0]
                    else:
                        # TODO: add read price and update
                        # historical price database
                        dateback = 0
                        while True:
                            try:
                                new_price = si.get_data(
                                    ticker.lower(),
                                    start_date=d +
                                    timedelta(days=0 - dateback),
                                    end_date=d + timedelta(days=1 - dateback))
                                break
                            except KeyError:
                                dateback += 1

                        new_price['price'] = (new_price['open'] +
                                              new_price['close']) / 2
                        new_price = new_price[['ticker', 'price']] \
                            .reset_index().rename(columns={'index': 'date'})
                        new_price.date = d.strftime("%Y-%m-%d")
                        h_prices = pd.concat(
                            [h_prices, new_price], axis=0,
                            sort=False).fillna(0).reset_index(drop=True)

                        price = new_price.price.values[0]

                    blc += position * price

        if num_h_prices_entries != h_prices.shape[0]:
            h_prices.to_csv(h_prices_path, index=False)
        blc += self._cash
        return round(blc, 2)
def get_stock(ticker):
    return si.get_data(ticker)
def fetch_ticker_data(ticker) -> pd.DataFrame:
    if isinstance(ticker, str):
        _df = si.get_data(ticker)
        return _df
index_as_date = True
interval = "1d"

# =============================================================================
# IN_STEPS is the length of training data
# OUT_STEPS outsteps is the size of label predicitions we want to make
# When adjusting these values one needs to keep in mind the amount of actual
# data that training is performed with, and ensure there is more training data than
# INSTEPS.
# =============================================================================

IN_STEPS = 170
OUT_STEPS = 60

#Import data from Yahoo Finance
df = si.get_data(ticker, start_date, end_date, index_as_date, interval)
date_time = pd.to_datetime(df.index, format='%d-%m-%Y %H:%M:%S')

#Prune features and introduce time signals.
df = df.drop(columns=['open', 'high', 'low', 'close', 'ticker'])
#df = df.drop(columns = ['ticker'])

# =============================================================================
# timestamp_s = date_time.map(datetime.datetime.timestamp)
# year = (365.2425)*24*60*60
# df['Year sin'] = np.sin(timestamp_s * (2 * np.pi / year))
# df['Year cos'] = np.cos(timestamp_s * (2 * np.pi / year))
# =============================================================================

indicator_bb = ta.volatility.BollingerBands(close=df["adjclose"],
                                            window=30,
def loadData(ticker,
             n_steps=25,
             scale=True,
             predict_step=1,
             split_by_date=True,
             test_size=0.2):

    feature_columns = ['adjclose', 'volume', 'open', 'high', 'low']
    #Use all features from yahooFinance

    #Load from yahoo finance
    df = si.get_data(ticker)

    result = {}
    result['df'] = df.copy()

    #add date as a column
    if "date" not in df.columns:
        df["date"] = df.index

    #Scale Data
    if scale:
        column_scaler = {}

        #scale from 0to1
        for column in feature_columns:
            scaler = preprocessing.MinMaxScaler()
            df[column] = scaler.fit_transform(
                np.expand_dims(df[column].values, axis=1))
            column_scaler[column] = scaler
        #add to results
        result["column_scaler"] = column_scaler

    #add target column by shifting my predict step
    df['future'] = df['adjclose'].shift(-predict_step)

    #last lookup contains nan in future column
    # get before dropping NaNs
    last_sequence = np.array(df[feature_columns].tail(predict_step))

    df.dropna(inplace=True)
    sequence_data = []
    sequences = deque(maxlen=n_steps)
    for entry, target in zip(df[feature_columns + ["date"]].values,
                             df['future'].values):
        sequences.append(entry)
        if len(sequences) == n_steps:
            sequence_data.append([np.array(sequences), target])

    #last sequence get by appending last nstep with predictstep
    # for instance, if n_steps=50 and predict_step=10, last_sequence should be of 60 (that is 50+10) length
    # this last_sequence will be used to predict future stock prices that are not available in the dataset
    last_sequence = list([s[:len(feature_columns)]
                          for s in sequences]) + list(last_sequence)
    last_sequence = np.array(last_sequence).astype(np.float32)

    result['last_sequence'] = last_sequence

    #form NN data
    X, y = [], []

    for seq, target in sequence_data:
        X.append(seq)
        y.append(target)

    X = np.array(X)
    y = np.array(y)

    if split_by_date:
        # split the dataset into training & testing sets by date (not randomly splitting)
        train_samples = int((1 - test_size) * len(X))
        result["X_train"] = X[:train_samples]
        result["y_train"] = y[:train_samples]
        result["X_test"] = X[train_samples:]
        result["y_test"] = y[train_samples:]
    else:
        # split the dataset randomly
        result["X_train"], result["X_test"], result["y_train"], result[
            "y_test"] = train_test_split(X,
                                         y,
                                         test_size=test_size,
                                         shuffle=False)

    #get tesat dates
    dates = result["X_test"][:, -1, -1]

    #results from test tades
    result["test_df"] = result["df"].loc[dates]

    # Kill Dupes
    result["test_df"] = result["test_df"][~result["test_df"].index.duplicated(
        keep='first')]

    #remove dates from train test
    result["X_train"] = result["X_train"][:, :, :len(feature_columns)].astype(
        np.float32)
    result["X_test"] = result["X_test"][:, :, :len(feature_columns)].astype(
        np.float32)

    return result
Beispiel #21
0
#print(type(ui))
#<class 'pandas.core.frame.DataFrame'>

from yahoo_fin.stock_info import get_data

ui = get_data("UI").round(2)
#print(ui.dtypes)
print(ui.tail(100))
#ui.to_csv('ui.csv')
Beispiel #22
0
    "SBSP3.SA", "SCAR3.SA", "SEER3.SA", "SGPS3.SA", "SHOW3.SA", "SLCE3.SA",
    "SMLS3.SA", "SMTO3.SA", "SQIA3.SA", "STBP3.SA", "SULA11.SA", "SUZB3.SA",
    "TAEE11.SA", "TASA3.SA", "TASA4.SA", "TCSA3.SA", "TECN3.SA", "TEND3.SA",
    "TESA3.SA", "TGMA3.SA", "TIET11.SA", "TIMP3.SA", "TOTS3.SA", "TRIS3.SA",
    "TRPL4.SA", "TUPY3.SA", "UCAS3.SA", "UGPA3.SA", "UNIP6.SA", "USIM3.SA",
    "USIM5.SA", "VALE3.SA", "VIVA3.SA", "VIVT4.SA", "VLID3.SA", "VULC3.SA",
    "VVAR3.SA", "WEGE3.SA", "WIZS3.SA", "YDUQ3.SA"
]

today = datetime.datetime.now()
start = "2020-01-01"
print(today.strftime("%x"))

for stock in stocks:
    try:
        #print(si.get_live_price(stock))
        few_days = si.get_data(stock,
                               start_date=start,
                               end_date=today.strftime("%x"))
        #print(few_days)
        normalized_df = (few_days['close'] -
                         few_days['close'].mean()) / few_days['close'].std()
        #print(normalized_df)
        if (normalized_df.loc["2020-07-10"] < -0.5):
            print(stock, normalized_df.loc["2020-07-10"])
            #plt.plot(normalized_df, label=stock)
    except:
        print("Erro:", stock)
#plt.legend()
#plt.show()
from yahoo_fin import stock_info as si
import matplotlib.pyplot as plt
import math
import csv
import json
import pandas as pd
#ts = TimeSeries(key='4G0QOOO1JVKLRWM2')
spydata=si.get_data('amzn' , start_date = '12/27/2014')
#spydata=pd.read_csv('https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol=SPY&interval=1min&outputsize=full&apikey=4G0QOOO1JVKLRWM2&datatype=csv')
shdata=si.get_data('sh' , start_date = '12/27/2014')
#shdata=pd.read_csv('https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol=SPY&interval=1min&outputsize=full&apikey=4G0QOOO1JVKLRWM2&datatype=csv')
mlist=[]
xaxis=[]
plist=[]
shlist=[]
period=12
lines=1258
#lines=1762
for x in range(period,lines):
    xaxis.append(x)
for x in range(period,lines):
  #x=1762-x
  temp=spydata.iloc[x,3]
  plist.append(temp)
  temp=shdata.iloc[x,3]
  shlist.append(temp)
for x in range(period,lines):
  #x=1762-x
  temp=spydata.iloc[x,3]-spydata.iloc[(x-period),3]
  temp2=((spydata.iloc[x,3]/spydata.iloc[(x-period),3])-1)*250
  mlist.append(temp)
Beispiel #24
0
def get_intraday_data_from_ticker(ticker):
    pbar.update(1)
    return si.get_data(ticker, interval='1m', index_as_date=False)
Beispiel #25
0
def load_data(ticker, n_steps=50, scale=True, shuffle=True, lookup_step=1,
              test_size=0.2, feature_columns=['adjclose', 'volume', 'open', 'high', 'low']):
    # see if ticker is already a loaded stock from yahoo finance
    if isinstance(ticker, str):
        # load it from yahoo_fin library
        df = si.get_data(ticker)
    elif isinstance(ticker, pd.DataFrame):
        # already loaded, use it directly
        df = ticker
    # this will contain all the elements we want to return from this function
    result = {}
    # we will also return the original dataframe itself
    result['df'] = df.copy()
    # make sure that the passed feature_columns exist in the dataframe
    for col in feature_columns:
        assert col in df.columns
    if scale:
        column_scaler = {}
        # scale the data (prices) from 0 to 1
        for column in feature_columns:
            scaler = preprocessing.MinMaxScaler()
            df[column] = scaler.fit_transform(
                np.expand_dims(df[column].values, axis=1))
            column_scaler[column] = scaler

        # add the MinMaxScaler instances to the result returned
        result["column_scaler"] = column_scaler
    # add the target column (label) by shifting by `lookup_step`
    df['future'] = df['adjclose'].shift(-lookup_step)
    # last `lookup_step` columns contains NaN in future column
    # get them before droping NaNs
    last_sequence = np.array(df[feature_columns].tail(lookup_step))
    # drop NaNs
    df.dropna(inplace=True)
    sequence_data = []
    sequences = deque(maxlen=n_steps)
    for entry, target in zip(df[feature_columns].values, df['future'].values):
        sequences.append(entry)
        if len(sequences) == n_steps:
            sequence_data.append([np.array(sequences), target])
    # get the last sequence by appending the last `n_step` sequence with `lookup_step` sequence
    # for instance, if n_steps=50 and lookup_step=10, last_sequence should be of 59 (that is 50+10-1) length
    # this last_sequence will be used to predict in future dates that are not available in the dataset
    last_sequence = list(sequences) + list(last_sequence)
    # shift the last sequence by -1
    last_sequence = np.array(pd.DataFrame(last_sequence).shift(-1).dropna())
    # add to result
    result['last_sequence'] = last_sequence
    # construct the X's and y's
    X, y = [], []
    for seq, target in sequence_data:
        X.append(seq)
        y.append(target)
    # convert to numpy arrays
    X = np.array(X)
    y = np.array(y)
    # reshape X to fit the neural network
    X = X.reshape((X.shape[0], X.shape[2], X.shape[1]))
    # split the dataset
    result["X_train"], result["X_test"], result["y_train"], result["y_test"] = train_test_split(
        X, y, test_size=test_size, shuffle=shuffle)
    # return the result
    return result

    def create_model(input_length, units=256, cell=LSTM, n_layers=2, dropout=0.3,
                     loss="mean_absolute_error", optimizer="rmsprop"):
        model = Sequential()
    for i in range(n_layers):
        if i == 0:
            # first layer
            model.add(cell(units, return_sequences=True,
                           input_shape=(None, input_length)))
        elif i == n_layers - 1:
            # last layer
            model.add(cell(units, return_sequences=False))
        else:
            # hidden layers
            model.add(cell(units, return_sequences=True))
        # add dropout after each layer
        model.add(Dropout(dropout))
    model.add(Dense(1, activation="linear"))
    model.compile(loss=loss, metrics=[
                  "mean_absolute_error"], optimizer=optimizer)
    return model
#Ensure your stock list has the following columns: Symbol, Industry, Volume, Name, Industry and Market Cap Size


stocks_df = pd.read_excel('/content/drive/MyDrive/StocksToScreen/StockMasterList.xlsx')
stocks_df = stocks_df.loc[stocks_df['Industry'].isin(list_to_keep)]
stocks_df_new = stocks_df.rename(columns={'Market Cap Size':'MarketCapSize'})
#stocks_df

# Define the ticker list
ticker_list = stocks_df['Symbol']
print(ticker_list)

historical_datas = {}
for ticker in ticker_list:
    try:
      historical_datas[ticker] = get_data(ticker)
      print(f"saved {ticker}")
    except:
      print(f"Error with {ticker}")

# Spot test
historical_datas["TSLA"]['volume'].loc["2021-2-10"]

"""# Start Final Data Frame. Get Volume first and clear volumes <100,000"""

from pandas.tseries.offsets import BMonthEnd, BMonthBegin
from datetime import date

#Get today's date to get yesterdays volume
todayD1 = date.today()
Beispiel #27
0
    def load_data(
            ticker,
            n_steps=25,
            scale=True,
            shuffle=True,
            lookup_step=1,
            split_by_date=True,
            test_size=0.1,
            feature_columns=['adjclose', 'volume', 'open', 'high', 'low']):
        """
        Loads data from Yahoo Finance source, as well as scaling, shuffling, normalizing and splitting.
        Params:
            ticker (str/pd.DataFrame): the ticker you want to load, examples include AAPL, TESL, etc.
            n_steps (int): the historical sequence length (i.e window size) used to predict, default is 50
            scale (bool): whether to scale prices from 0 to 1, default is True
            shuffle (bool): whether to shuffle the dataset (both training & testing), default is True
            lookup_step (int): the future lookup step to predict, default is 1 (e.g next day)
            split_by_date (bool): whether we split the dataset into training/testing by date, setting it 
                to False will split datasets in a random way
            test_size (float): ratio for test data, default is 0.2 (20% testing data)
            feature_columns (list): the list of features to use to feed into the model, default is everything grabbed from yahoo_fin
        """
        # Set ticker equal to 'tick' value
        ticker = tick

        # see if ticker is already a loaded stock from yahoo finance
        if isinstance(ticker, str):
            # load it from yahoo_fin library
            df = si.get_data(ticker)
        elif isinstance(ticker, pd.DataFrame):
            # already loaded, use it directly
            df = ticker
        else:
            raise TypeError(
                "ticker can be either a str or a `pd.DataFrame` instances")

        # this will contain all the elements we want to return from this function
        result = {}

        # we will also return the original dataframe itself
        result['df'] = df.copy()

        # make sure that the passed feature_columns exist in the dataframe
        for col in feature_columns:
            assert col in df.columns, f"'{col}' does not exist in the dataframe."
        # add date as a column
        if "date" not in df.columns:
            df["date"] = df.index
        if scale:
            column_scaler = {}
            # scale the data (prices) from 0 to 1
            for column in feature_columns:
                scaler = preprocessing.MinMaxScaler()
                df[column] = scaler.fit_transform(
                    np.expand_dims(df[column].values, axis=1))
                column_scaler[column] = scaler
            # add the MinMaxScaler instances to the result returned
            result["column_scaler"] = column_scaler

        # add the target column (label) by shifting by `lookup_step`
        df['future'] = df['adjclose'].shift(-lookup_step)

        # last `lookup_step` columns contains NaN in future column
        # get them before droping NaNs
        last_sequence = np.array(df[feature_columns].tail(lookup_step))

        # drop NaNs
        df.dropna(inplace=True)
        sequence_data = []
        sequences = deque(maxlen=n_steps)
        for entry, target in zip(df[feature_columns + ["date"]].values,
                                 df['future'].values):
            sequences.append(entry)
            if len(sequences) == n_steps:
                sequence_data.append([np.array(sequences), target])

        # get the last sequence by appending the last `n_step` sequence with `lookup_step` sequence
        # for instance, if n_steps=50 and lookup_step=10, last_sequence should be of 60 (that is 50+10) length
        # this last_sequence will be used to predict future stock prices that are not available in the dataset
        last_sequence = list([s[:len(feature_columns)]
                              for s in sequences]) + list(last_sequence)
        last_sequence = np.array(last_sequence).astype(np.float32)

        # add to result
        result['last_sequence'] = last_sequence

        # construct the X's and y's
        X, y = [], []
        for seq, target in sequence_data:
            X.append(seq)
            y.append(target)

        # convert to numpy arrays
        X = np.array(X)
        y = np.array(y)
        if split_by_date:
            # split the dataset into training & testing sets by date (not randomly splitting)
            train_samples = int((1 - test_size) * len(X))
            result["X_train"] = X[:train_samples]
            result["y_train"] = y[:train_samples]
            result["X_test"] = X[train_samples:]
            result["y_test"] = y[train_samples:]
            if shuffle:
                # shuffle the datasets for training (if shuffle parameter is set)
                shuffle_in_unison(result["X_train"], result["y_train"])
                shuffle_in_unison(result["X_test"], result["y_test"])
        else:
            # split the dataset randomly
            result["X_train"], result["X_test"], result["y_train"], result[
                "y_test"] = train_test_split(X,
                                             y,
                                             test_size=test_size,
                                             shuffle=shuffle)

        # get the list of test set dates
        dates = result["X_test"][:, -1, -1]

        # retrieve test features from the original dataframe
        result["test_df"] = result["df"].loc[dates]

        # remove duplicated dates in the testing dataframe
        result["test_df"] = result["test_df"][~result["test_df"].index.
                                              duplicated(keep='first')]

        # remove dates from the training/testing sets & convert to float32
        result["X_train"] = result["X_train"][:, :, :len(feature_columns
                                                         )].astype(np.float32)
        result["X_test"] = result["X_test"][:, :, :len(feature_columns
                                                       )].astype(np.float32)
        return result
Beispiel #28
0
#start_date = '2015.12.02' if input data from "01.01.1995" to "01.01.2021"
#end_date = '2020.12.29'

# M/D/Y
start_date = '01.01.2010'
end_date = '05.10.2021'
ticker = '^N225'

fig_size = (15, 8)
size = 18  # text size
dpi = 500
rcParams['font.family'] = 'serif'
rcParams['font.serif'] = ['Times']

df = si.get_data(ticker, start_date, end_date)
orig_price = df.copy()
orig_price.reset_index(inplace=True)
df = df['adjclose']
#print(orig_price)

# start and end date from data set used for plot
start_plot = df.index[0]
end_plot = df.index[-1]

## Plot index price graph
#fig, ax = plt.subplots(figsize = fig_size)
#for label in (ax.get_xticklabels() + ax.get_yticklabels()):
#	label.set_fontsize(size)

#ax.plot(df, c='steelblue', linewidth = 0.9)
Beispiel #29
0
import numpy as np
import requests
import yahoo_fin.stock_info as si
#import zipline as zl

#price = si.get_live_price('AAPL')
#print('{0:.2f}'.format(price))

hist = si.get_data('AAPL', start_date='01-01-2018', end_date='09-01-2018')
#print(hist)
prices = hist['high'].values
print(prices)

#from alpha_vantage.timeseries import TimeSeries
#import matplotlib.pyplot as plt
#import sys

#def stockchart(symbol):
#    ts = TimeSeries(key='your_key', output_format='pandas')
#    data, meta_data = ts.get_intraday(symbol=symbol,interval='60min', outputsize='full')
#    data['4. close'].plot()
#    plt.title(symbol)
#    plt.show()

#stockchart('AAPL')
Beispiel #30
0
def get_last_stock_price(ticker, last=False):
    if last:
        now = datetime.now()
        start_date = now - timedelta(days=2190)
        return si.get_data(ticker, start_date, interval = "1d")
    return si.get_data(ticker)