def crearDatosForecast(ticker, window_size=60, loss='mae', optimizer='adam', metrics=['mae']): # Preparamos el modelo y los datos utilizados para realizar las predicciones # Creamos el modelo modelo = crearModelo(window_size, loss, optimizer, metrics) # Leemos los pesos guardados anteriormente modelo.load_weights(os.path.join(FOLDER, (ticker + '.h5'))) # Se descargar los datos de la serie pero solo de los últimos dos períodos (largo de la ventana * 2) df = DataReader(ticker, data_source='yahoo', start=datetime.now() - timedelta(window_size * 2)) # Creamos una entrada en un diccionario con la clave del ticker. Incluimos el modelo, la serie con valores del cierre # y un vector con los valores del cierre data_dic = { 'modelo': modelo, 'data': df.filter(['Close']), 'dataset': df.filter(['Close']).values } # Agregado V2.0 scaler = loadScaler(ticker) data_dic['scaler'] = scaler data_dic['scaled_data'] = scaler.fit_transform(data_dic['dataset']) return data_dic
def stock_predict_plt(key): end = datetime.now() start = datetime(end.year - 2, end.month, end.day) df = DataReader(key, data_source='yahoo', start=start, end=end) data = df.filter(['Close']) dataset = data.values training_data_len = int(np.ceil(len(dataset) * .8)) scaler = MinMaxScaler(feature_range=(0, 1)) scaled_data = scaler.fit_transform(dataset) train_data = scaled_data[0:int(training_data_len), :] x_train = [] y_train = [] for i in range(30, len(train_data)): x_train.append(train_data[i - 30:i, 0]) y_train.append(train_data[i, 0]) x_train, y_train = np.array(x_train), np.array(y_train) x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) model = Sequential() model.add( LSTM(70, return_sequences=False, input_shape=(x_train.shape[1], 1))) model.add(Dense(1)) model.compile(optimizer='adam', loss='mean_squared_error') model.fit(x_train, y_train, batch_size=1, epochs=20) test_data = scaled_data[training_data_len - 30:, :] x_test = [] y_test = dataset[training_data_len:, :] for i in range(30, len(test_data)): x_test.append(test_data[i - 30:i, 0]) x_test = np.array(x_test) x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1)) predictions = model.predict(x_test) predictions = scaler.inverse_transform(predictions) train = data[-500:training_data_len] valid = data[training_data_len:] valid['Predictions'] = predictions img = io.BytesIO() plt.figure(figsize=(16, 8)) plt.xlabel('Date', fontsize=18) plt.ylabel('Close Price USD ($)', fontsize=18) plt.plot(train['Close']) plt.plot(valid[['Close', 'Predictions']]) plt.legend(['Train', 'Val', 'Predictions'], loc='lower right') plt.savefig(img, format='png') plot_url = base64.b64encode(img.getbuffer()).decode("ascii") return plot_url
def crearSerie(ticker, start='2012-01-01', end=datetime.now(), window_size=60): # Obtener cotizaciones desde yahoo finance df = DataReader(ticker, data_source='yahoo', start=start, end=end) # Vamos a utilizar los valores del cierre data = df.filter(['Close']) # Obtener valores como array de numpy dataset = data.values # Obtener el número de filas que se utilizarán para el entrenamiento training_data_len = int(np.ceil(len(dataset) * .8)) # Llevar los valores a escala entre 0 y 1 scaler = MinMaxScaler(feature_range=(0, 1)) scaled_data = scaler.fit_transform(dataset) # Nos guardamos el scaler para utilizarlo en las predicciones (Agregado V2.0) saveScaler(ticker, scaler) # Obtener los valores de entrenamiento train_data = scaled_data[0:int(training_data_len), :] x_train = [] y_train = [] # Divido los datos de train en x_train e y_train # Vamos a usar bloques de train_size for i in range(window_size, len(train_data)): x_train.append(train_data[i - window_size:i, 0]) y_train.append(train_data[i, 0]) # Se conviertes x_train e y_train a numpy array x_train, y_train = np.array(x_train), np.array(y_train) # Se hace reshape de x_train x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) # Se crea dataset para testing test_data = scaled_data[training_data_len - window_size:, :] x_test = [] y_test = dataset[training_data_len:, :] for i in range(window_size, len(test_data)): x_test.append(test_data[i - window_size:i, 0]) # Convierte x_test a numpy array x_test = np.array(x_test) # Se hace reshape de x_test x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1)) # Última ventana de tiempo (train_size) x_forecast = [] x_forecast.append(scaled_data[len(dataset) - window_size:, 0]) x_forecast = np.array(x_forecast) x_forecast = np.reshape(x_forecast, (x_forecast.shape[0], x_forecast.shape[1], 1)) return scaler, x_train, y_train, x_test, y_test, data, scaled_data, training_data_len, x_forecast
def stock_predict(SYMBOL, COMPANY): n_of_pages = 10 look_back = 7 # COMPANY = "FACEBOOK" # SYMBOL = "FB" df = pd.concat( [extract_link_of_news(COMPANY, i) for i in range(1, n_of_pages + 1)], ignore_index=True) analyser = SentimentIntensityAnalyzer() text = df['internals_text'] df["text_split"] = df["internals_text"].apply(split_by_dot) df["time"] = pd.to_datetime(df["internals_dates"]) df = df.sort_values("time") df = df.set_index("time") text_2 = df["text_split"][0] sen_res = sentimental_analysis_by_phrase(text_2) df["sentimental_analysis_phrase"] = df["text_split"].apply( sentimental_analysis_by_phrase) df["sentimental_analysis_average"] = df[ "sentimental_analysis_phrase"].apply(np.mean) df["sentimental_analysis_score"] = df["internals_text"].apply( sentimental_analysis) sentiment_df = df[[ 'internals_dates', 'sentimental_analysis_score', 'internal_urls' ]].groupby('internals_dates').mean().reset_index() sentiment_df["time"] = pd.to_datetime(sentiment_df["internals_dates"]) sentiment_df = sentiment_df.sort_values("time").reset_index(drop=True) end = max(sentiment_df['time']) start = min(sentiment_df['time']) stock_df = DataReader(SYMBOL, data_source='yahoo', start=start, end=end) stock_df = stock_df.filter(['Close']) stock_df["t1"] = pd.to_datetime(stock_df.index) result = pd.merge(stock_df, sentiment_df, how='left', on=None, left_on='t1', right_on='time', left_index=False, right_index=False, sort=True, suffixes=('_x', '_y'), copy=True, indicator=False) result = result.fillna(0) result.set_index(["t1"], inplace=True) # original time series (Y) y = result.Close.values y = y.astype('float32') y = np.reshape(y, (-1, 1)) scaler = MinMaxScaler(feature_range=(0, 1)) y = scaler.fit_transform(y) # extra information: features of the sentiment analysis X = result.sentimental_analysis_score.values X = X.astype('float32') X = np.reshape(X, (-1, 1)) train_size = len(y) test_size = len(y) - train_size - 2 train_y, test_y = y[0:train_size + 2, :], y[train_size - look_back:, :] train_x, test_x = X[0:train_size + 2, :], X[train_size - look_back:, :] X_train_features_1, y_train = create_dataset(train_y, look_back) X_train_features_2, auxiliar_1 = create_dataset(train_x, look_back) X_train_features_1 = np.reshape( X_train_features_1, (X_train_features_1.shape[0], 1, X_train_features_1.shape[1])) X_train_features_2 = np.reshape( X_train_features_2, (X_train_features_2.shape[0], 1, X_train_features_2.shape[1])) X_train_all_features = np.append(X_train_features_1, X_train_features_2, axis=1) model = Sequential() model.add( LSTM(128, activation='relu', input_shape=(X_train_all_features.shape[1], X_train_all_features.shape[2]))) model.add(RepeatVector(30)) model.add(LSTM(16, activation='relu', return_sequences=(True))) model.add(TimeDistributed(Dense(3))) model.compile(loss='mean_squared_error', optimizer='adam') history = model.fit( X_train_all_features, y_train, epochs=50, batch_size=1, #validation_data=(X_test_all_features, y_test), callbacks=[EarlyStopping(monitor='loss', patience=10)], verbose=0, shuffle=True) p = np.array([[[i[0] for i in y[-look_back:]], [i[0] for i in X[-look_back:]]]]) test_predict = model.predict(p) test_predict = scaler.inverse_transform( np.array([x[0] for x in test_predict]))[0] # img = io.BytesIO() # plt.style.use('seaborn-dark') # plt.figure(figsize=(16,8)) # plt.xlabel('Date', fontsize=18) # plt.ylabel('Close Price USD ($)', fontsize=18) test_predict_time = [ result.index[-1] + timedelta(days=1), result.index[-1] + timedelta(days=2), result.index[-1] + timedelta(days=3) ] plt_predict = pd.DataFrame(data=test_predict, index=test_predict_time) plt_historical = pd.DataFrame( data=list(result['Close'].values) + list(test_predict), index=list(result.index.values) + test_predict_time) # plt.plot(plt_historical[-30:], label="History", marker=".") # plt.plot(plt_predict, label="Predict", marker=".") # plt.annotate("{:.2f}".format(plt_predict[0][0]), (plt_predict.index[0],plt_predict[0][0]), ha='right') # plt.annotate("{:.2f}".format(plt_predict[0][1]), (plt_predict.index[1],plt_predict[0][1]), ha='center') # plt.annotate("{:.2f}".format(plt_predict[0][2]), (plt_predict.index[2],plt_predict[0][2]), ha='left') # plt.legend(['History', 'Predictions'], loc='lower right') # # plt.title("LSTM fit of Stock Market Prices Including Sentiment Signal",size = 20) # plt.tight_layout() # sns.despine(top=True) # plt.grid() # plt.savefig(img, format='png') # plot_url = base64.b64encode(img.getbuffer()).decode("ascii") news_return = [] a = df[["internals_text", "sentimental_analysis_score", "internal_urls"]].drop_duplicates() for idx, row in a[-10:].iterrows(): line = { "text": row["internals_text"], "date": datetime.date(idx), "score": row["sentimental_analysis_score"], "link": row["internal_urls"] } news_return.insert(0, line) return plt_predict, plt_historical, news_return, list(test_predict)
MSFT[['Adj Close','MA-5 days','MA-10 days','MA-20 days','MA-40 days']].plot(ax=axes[0,2]) axes[0,2].set_title('Microsoft') AMZN[['Adj Close','MA-5 days','MA-10 days','MA-20 days','MA-40 days']].plot(ax=axes[0,3]) axes[0,3].set_title('Amazon') DIS[['Adj Close','MA-5 days','MA-10 days','MA-20 days','MA-40 days']].plot(ax=axes[1,0]) axes[1,0].set_title('Disney') TSLA[['Adj Close','MA-5 days','MA-10 days','MA-20 days','MA-40 days']].plot(ax=axes[1,1]) axes[1,1].set_title('Tesla') GME[['Adj Close','MA-5 days','MA-10 days','MA-20 days','MA-40 days']].plot(ax=axes[1,2]) axes[1,2].set_title('Gamestop') AMC[['Adj Close','MA-5 days','MA-10 days','MA-20 days','MA-40 days']].plot(ax=axes[1,3]) axes[1,3].set_title('AMC') fig.tight_layout() df = DataReader('AAPL', 'yahoo', beginDate, endDate) data = df.filter(['Close']) # Convert the dataframe to a numpy array dataset = data.values # Get the number of rows to train the model on training_data_len = int(np.ceil( len(dataset) * .95 )) training_data_len # Scale the data from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler(feature_range=(0,1)) scaled_data = scaler.fit_transform(dataset) scaled_data # Create the training data set # Create the scaled training data set train_data = scaled_data[0:int(training_data_len), :]
for i in range(len(dataset) - look_back - 2): a = dataset[i:(i + look_back), 0] X.append(a) Y.append(dataset[i + look_back:(i + look_back + 3), 0]) return np.array(X), np.array(Y) look_back = 7 end = max(sentiment_df['time']) start = min(sentiment_df['time']) #- timedelta(days=6) stock_df = DataReader(SYMBOL, data_source='yahoo', start=start, end=end) # stock_df["ra"] = stock_df.Close.rolling(window=5).mean() # stock_df = stock_df[stock_df['ra'].notna()] stock_df = stock_df.filter(['Close']) stock_df["t1"] = pd.to_datetime(stock_df.index) result = pd.merge(stock_df, sentiment_df, how='left', on=None, left_on='t1', right_on='time', left_index=False, right_index=False, sort=True, suffixes=('_x', '_y'), copy=True, indicator=False) result = result.fillna(0)
fig1 = plot_plotly(m, forecast) st.plotly_chart(fig1,use_container_width = True) except: pass # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # Prediction using LSTM# # # # # # # # # # # # # # # # # st.header("Prediction with Long Short-term Memory (LSTM)") st.write(" ") try: # Create a new dataframe with only the 'Close column LSTM_df = DataReader(selected_stock, data_source='yahoo', start='2019-03-01', end=datetime.now()) LSTM_data = LSTM_df.filter(['Close']) # Convert the dataframe to a numpy array LSTM_dataset = LSTM_data.values # Get the number of rows to train the model on training_data_len = int(np.ceil( len(LSTM_dataset) * .95 )) # Scale the data # from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler(feature_range=(0,1)) scaled_data = scaler.fit_transform(LSTM_dataset) # Create the training data set # Create the scaled training data set train_data = scaled_data[0:int(training_data_len), :] # Split the data into x_train and y_train data sets