def get_stock_data_classification(stock_name, normalize=True, ma=[]): '''Rise = +1, Fall = -1, otherwise = 0''' start = datetime.datetime(1950, 1, 1) end = datetime.date.today() df = web.DataReader(stock_name, "yahoo", start, end) if ma != []: for moving in ma: df['{}ma'.format(moving)] = df['Adj Close'].rolling(window=moving).mean() df.drop(['Close'], 1, inplace=True) df['Pct'] = df['Adj Close'].pct_change() df.dropna(inplace=True) if normalize: min_max_scaler = preprocessing.MinMaxScaler() df['Open'] = min_max_scaler.fit_transform(df.Open.values.reshape(-1,1)) df['High'] = min_max_scaler.fit_transform(df.High.values.reshape(-1,1)) df['Low'] = min_max_scaler.fit_transform(df.Low.values.reshape(-1,1)) df['Volume'] = min_max_scaler.fit_transform(df.Volume.values.reshape(-1,1)) df['Adj Close'] = min_max_scaler.fit_transform(df['Adj Close'].values.reshape(-1,1)) if ma != []: for moving in ma: df['{}ma'.format(moving)] = min_max_scaler.fit_transform(df['{}ma'.format(moving)].values.reshape(-1,1)) df['Pos'] = list(map(buy_sell_hold_pos, df['Pct'])) df['Neg'] = list(map(buy_sell_hold_neg, df['Pct'])) if normalize: min_max_scaler = preprocessing.MinMaxScaler() df['Pct'] = min_max_scaler.fit_transform(df.Pct.values.reshape(-1,1)) return df
def get_stock_data(stock_name, normalize=True, ma=[]): start = datetime.datetime(1950, 1, 1) end = datetime.date.today() df = web.DataReader(stock_name, "yahoo", start, end) if ma != []: for moving in ma: df['{}ma'.format(moving)] = df['Adj Close'].rolling( window=moving).mean() df.drop(['Close'], 1, inplace=True) df.dropna(inplace=True) if normalize: min_max_scaler = preprocessing.MinMaxScaler() df['Open'] = min_max_scaler.fit_transform(df.Open.values.reshape( -1, 1)) df['High'] = min_max_scaler.fit_transform(df.High.values.reshape( -1, 1)) df['Low'] = min_max_scaler.fit_transform(df.Low.values.reshape(-1, 1)) df['Volume'] = min_max_scaler.fit_transform( df.Volume.values.reshape(-1, 1)) df['Adj Close'] = min_max_scaler.fit_transform( df['Adj Close'].values.reshape(-1, 1)) if ma != []: for moving in ma: df['{}ma'.format(moving)] = min_max_scaler.fit_transform( df['{}ma'.format(moving)].values.reshape(-1, 1)) return df
def denormalize(stock_name, normalized_value): start = datetime.datetime(2000, 1, 1) end = datetime.date.today() df = web.DataReader(stock_name, "yahoo", start, end) df = df['Adj Close'].values.reshape(-1, 1) normalized_value = normalized_value.reshape(-1, 1) #return df.shape, p.shape min_max_scaler = preprocessing.MinMaxScaler() a = min_max_scaler.fit_transform(df) new = min_max_scaler.inverse_transform(normalized_value) return new
def get_stock_data(stock_name, normalize): start = datetime.datetime(1950, 1, 1) end = datetime.date.today() df = web.DataReader(stock_name, "yahoo", start, end) df.drop(['Volume', 'Close'], 1, inplace=True) if normalize: min_max_scaler = preprocessing.MinMaxScaler() df['Open'] = min_max_scaler.fit_transform(df.Open.values.reshape(-1,1)) df['High'] = min_max_scaler.fit_transform(df.High.values.reshape(-1,1)) df['Low'] = min_max_scaler.fit_transform(df.Low.values.reshape(-1,1)) df['Adj Close'] = min_max_scaler.fit_transform(df['Adj Close'].values.reshape(-1,1)) return df
def creacionElementosDiccionario(intervalo1, intervalo2, hora_del_dia, hora_de_referencia,day,df1_aux,pair): try: conn = psycopg2.connect("dbname='tfgtest1' user='******' host='localhost' password='******'") except: print("I am unable to connect to the database") cur = conn.cursor() query = "select time_window[1], volume from traffic_volume_tollgates_test1 where tollgate_id = '"+ str(pair[0]) +"' AND direction = " + str(pair[1]) + " AND (time_window[1].time BETWEEN " + intervalo1 + ") AND (time_window[1].date = DATE '"+str(day)+"') order by time_window;" cur.execute(query) rows = cur.fetchall() df2 = pd.DataFrame.from_records(rows, columns=['date','volume']) result_dataframe = pd.concat([df1_aux,df2]) try: conn = psycopg2.connect("dbname='tfgtraining2' user='******' host='localhost' password='******'") except: print("I am unable to connect to the database") query = "select time_window[1], volume from traffic_volume_tollgates_training2 where tollgate_id = '"+ str(pair[0]) +"' AND direction = " + str(pair[1]) + "AND (time_window[1].date = DATE '"+str(day)+"') AND (time_window[1].time BETWEEN " + intervalo2 + ") order by time_window;" cur = conn.cursor() cur.execute(query) rows2 = cur.fetchall() serie = pd.Series(result_dataframe['volume'].values, index=result_dataframe['date']) best_score, best_cfg = float("inf"), None valores_reales = [element[1] for element in rows2] for p in range(3,10): for d in range(3): for q in range(5): print("ORDEN : ", (p,d,q)) orderr = (p,d,q) try: model = ARIMA(serie, order=orderr) model_fit = model.fit(disp=0) forecast = model_fit.forecast(steps=6)[0] new_forecast=[] for element in rows2: new_forecast.append(forecast[((datetime.datetime(2018,1,1,element[0].hour,element[0].minute, 0)-hora_de_referencia)/1200).seconds]) mse = mean_squared_error(valores_reales, new_forecast) print("MSE : ", mse, " BEST_SCORE: ", best_score) if mse < best_score: print("BEST_SCORE : ", best_score) best_score, best_cfg = mse, orderr except: continue print('Best ARIMA%s MSE=%.3f' % (best_cfg, best_score)) model = ARIMA(serie, order= best_cfg) model_fit = model.fit(disp=0) predictions = model_fit.forecast(steps=6)[0] predictions = [round(element) for element in predictions] predicciones_pair_dia[pair[0], pair[1],day,hora_del_dia] = predictions predicciones_pair_dia[pair[0], pair[1],day,hora_del_dia] = np.append(predicciones_pair_dia[pair[0], pair[1],day,hora_del_dia], str(best_cfg))
conn = psycopg2.connect("dbname='tfgtraining2' user='******' host='localhost' password='******'") except: print("I am unable to connect to the database") cur = conn.cursor() cur.execute("""SELECT * FROM travel_time_intersection_to_tollgate_training2 WHERE (time_window[1].time BETWEEN TIME '08:00:00' AND TIME '09:40:00') OR (time_window[1].time BETWEEN TIME '17:00:00' AND TIME '18:40:00') ORDER BY intersection_id, tollgate_id, time_window """) rows = cur.fetchall() colnames = ['intersection_id', 'tollgate_id', 'time_window', 'avg_travel_time'] intervals_to_predict_real_avgtraveltime = pd.DataFrame(rows, columns=colnames) routes = np.array(intervals_to_predict_real_avgtraveltime.iloc[:,0:2].values.tolist()) time_intervals = np.array(intervals_to_predict_real_avgtraveltime.iloc[:,2].values.tolist()) routes = np.unique(routes, axis=0); days = np.unique([time_interval[0].strftime("%Y-%m-%d") for time_interval in time_intervals], axis=0) aux = np.array([]) for time_interval in time_intervals: time_interval[0] = datetime.datetime(2016, 10,18, time_interval[0].hour, time_interval[0].minute) aux = np.append(aux,time_interval[0]); time_intervals = sorted(set(aux)) routes_sum = 0; for route in routes: try: conn = psycopg2.connect("dbname='tfgdatosmodificados' user='******' host='localhost' password='******'") except: print("I am unable to connect to the database") cur = conn.cursor() query = "select time_window[1], avg_travel_time from travel_time_intersection_to_tollgate_modified where intersection_id = '" +route[0] +"' AND tollgate_id = " + route[1] + " order by time_window;" cur.execute(query) rows = cur.fetchall() df1 = pd.DataFrame.from_records(rows, columns=['date','avg_travel_time']) df1 = df1[(df1.avg_travel_time > 50) & (df1.avg_travel_time < 150)]
conn = psycopg2.connect("dbname='tfgtraining2' user='******' host='localhost' password='******'") except: print("I am unable to connect to the database") cur = conn.cursor() cur.execute("""SELECT tollgate_id, direction, time_window, volume FROM traffic_volume_tollgates_training2 WHERE (time_window[1].time BETWEEN TIME '08:00:00' AND TIME '09:40:00') OR (time_window[1].time BETWEEN TIME '17:00:00' AND TIME '18:40:00') ORDER BY tollgate_id, direction, time_window """) rows = cur.fetchall() colnames = ['tollgate_id', 'direction', 'time_window', 'volume'] pairs_to_predict_real_trafficvolume = pd.DataFrame(rows, columns=colnames) pairs = np.array(pairs_to_predict_real_trafficvolume.iloc[:,0:2].values.tolist()) time_intervals = np.array(pairs_to_predict_real_trafficvolume.iloc[:,2].values.tolist()) pairs = np.unique(pairs, axis=0); days = np.unique([time_interval[0].strftime("%Y-%m-%d") for time_interval in time_intervals], axis=0) aux = np.array([]) for time_interval in time_intervals: time_interval[0] = datetime.datetime(2016, 10,18, time_interval[0].hour, time_interval[0].minute) aux = np.append(aux,time_interval[0]); time_intervals = sorted(set(aux)) predicciones_pair_dia = dict() count = 0; for pair in pairs: try: conn = psycopg2.connect("dbname='tfgdatosmodificados' user='******' host='localhost' password='******'") except: print("I am unable to connect to the database") cur = conn.cursor() query = "select time_window[1], volume from traffic_volume_tollgates_modified where tollgate_id = '" +str(pair[0]) +"' AND direction = " + str(pair[1]) + " order by time_window;" cur.execute(query) rows = cur.fetchall()
# In[73]: plt.figure(figsize=(16,8)) lag_plot(azn_df['open'], lag=5) plt.title('AZN Stock - Autocorrelation plot with lag = 5') plt.show() # In[74]: plt.figure(figsize=(16,8)) plt.plot(azn_df["date"], azn_df["close"]) xticks = pd.date_range(datetime.datetime(2010,1,1), datetime.datetime(2021,1,1), freq='YS') xticks=xticks.to_pydatetime() plt.xticks(xticks) plt.title("AZN stock price over time") plt.xlabel("time") plt.ylabel("price") plt.show() # In[116]: X_train, X_test = azn_df[0:int(len(azn_df)*0.8)], azn_df[int(len(azn_df)*0.8):] X_train = X_train.set_index('date') X_test = X_test.set_index('date') X_test
import h5py #tensorflow at backend #input parameter stock_name = '^GSPC' seq_len = 22 d = 0.2 shape = [4, seq_len, 1] # feature, window, output neurons = [128, 128, 32, 1] epochs = 300 #data from 1950 start = datetime.datetime(1950, 1, 1) end = datetime.date.today() df = web.DataReader(stock_name, "yahoo", start, end) df.drop(['Volume', 'Close'], 1, inplace=True) if normalize: min_max_scaler = preprocessing.MinMaxScaler() df['Open'] = min_max_scaler.fit_transform(df.Open.values.reshape(-1,1)) df['High'] = min_max_scaler.fit_transform(df.High.values.reshape(-1,1)) df['Low'] = min_max_scaler.fit_transform(df.Low.values.reshape(-1,1)) df['Adj Close'] = min_max_scaler.fit_transform(df['Adj Close'].values.reshape(-1,1)) return df df = get_stock_data(stock_name, normalize=True) #plot normalized closing price
) rows = cur.fetchall() colnames = ['intersection_id', 'tollgate_id', 'time_window', 'avg_travel_time'] intervals_to_predict_real_avgtraveltime = pd.DataFrame(rows, columns=colnames) routes = np.array( intervals_to_predict_real_avgtraveltime.iloc[:, 0:2].values.tolist()) time_intervals = np.array( intervals_to_predict_real_avgtraveltime.iloc[:, 2].values.tolist()) routes = np.unique(routes, axis=0) days = np.unique([ time_interval[0].strftime("%Y-%m-%d") for time_interval in time_intervals ], axis=0) aux = np.array([]) for time_interval in time_intervals: time_interval[0] = datetime.datetime(2016, 10, 18, time_interval[0].hour, time_interval[0].minute) aux = np.append(aux, time_interval[0]) time_intervals = sorted(set(aux)) predicciones_ruta_dia = dict() count = 0 for route in routes: try: conn = psycopg2.connect( "dbname='tfgdatosmodificados' user='******' host='localhost' password='******'" ) except: print("I am unable to connect to the database") cur = conn.cursor() query = "select time_window[1], avg_travel_time from travel_time_intersection_to_tollgate_modified where intersection_id = '" + route[ 0] + "' AND tollgate_id = " + route[1] + " order by time_window;"