def get_last_trade_data(self, tkr, exchange, duration): """ tkr should be a valid ticker for the exchange echange should be a valid exchange code """ stock = dict() stock['q'] = tkr stock['x'] = exchange params = list() params.append(stock) self.logger.info("Queried data for " + tkr) data = gfc.get_prices_data(params, duration) return data
def Stock_name(cmpyname): from googlefinance.client import get_price_data, get_prices_data, get_prices_time_data params = [{'q':cmpyname}] period = "5Y" df = get_prices_data(params, period) forecast_col = cmpyname+'_Close' df.fillna(-99999,inplace=True) forecast_out = int(math.ceil(0.09*len(df))) df['label']=df[forecast_col].shift(-forecast_out) X = np.array(df.drop([cmpyname+'_Close'],1)) X = X[:-forecast_out] X_lately = X[-forecast_out:] df.dropna(inplace=True) Y = np.array(df['label']) clf = LinearRegression(n_jobs=-1) X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2) clf.fit(X_train, Y_train) accuracy = clf.score(X_test, Y_test) style.use('ggplot') forecast_set = clf.predict(X_lately) df['Forecast'] = np.nan d = df.iloc[-1].name last_date=datetime.combine(d, datetime.min.time()) last_unix = last_date.timestamp() one_day = 86400 next_unix = last_unix + one_day for i in forecast_set: next_date = datetime.fromtimestamp(next_unix) next_unix += 86400 df.loc[next_date] = [np.nan for _ in range(len(df.columns)-1)]+[i] df.reset_index(level=None,inplace=True) df=df.rename(index=str, columns={"index": "Date"}) df['Date'] = pd.to_datetime(df['Date']) df = df.set_index('Date') plt.figure(figsize=(20,8)) df[cmpyname+'_Close'].plot() df['Forecast'].plot() plt.legend(loc=4) plt.xlabel('Date') plt.ylabel('Price') df['Forecast'].plot() df.reset_index(level=None,inplace=True) df=df[1050:] #print("inside") #var2 = "new"+str(timestamp)+".png" plt.savefig('static/images/var4') return(df)
def example_2(self): params = [ # Dow Jones { 'q': ".DJI", 'x': "INDEXDJX", }, # NYSE COMPOSITE (DJ) { 'q': "NYA", 'x': "INDEXNYSEGIS", }, # S&P 500 { 'q': ".INX", 'x': "INDEXSP", } ] period = "1Y" # get open, high, low, close, volume data (return pandas dataframe) df = get_prices_data(params, period) print(df)
def fetch_nyse_index(self): """ Method to fetch the daily NYSE index. : param self : return nyse_index(pandas Dataframe) error_val(int) """ error_val = -1 try: params = [ # NYSE COMPOSITE (DJ) { 'q': "NYA", 'x': "INDEXNYSEGIS", } ] period = "5d" nyse_index = get_prices_data(params, period) nyse_index.index.name = INDEX return nyse_index.tail(n=1) except Exception as e: print(e) return error_val
'x': "INDEXDJX", }, # NYSE COMPOSITE (DJ) { 'q': "NYA", 'x': "INDEXNYSEGIS", }, # S&P 500 { 'q': ".INX", 'x': "INDEXSP", } ] period = "1Y" # get open, high, low, close, volume data (return pandas dataframe) df = get_prices_data(params, period) print(df) # .DJI_Open .DJI_High .DJI_Low .DJI_Close .DJI_Volume \ # 2016-07-20 18503.12 18562.53 18495.11 18559.01 85840786 # 2016-07-21 18582.70 18622.01 18555.65 18595.03 93233337 # 2016-07-22 18589.96 18590.44 18469.67 18517.23 86803016 # 2016-07-23 18524.15 18571.30 18491.59 18570.85 87706622 # 2016-07-26 18554.49 18555.69 18452.62 18493.06 76807470 # ... ... ... ... ... ... params = [ # Dow Jones { 'q': ".DJI", 'x': "INDEXDJX", },
def Stock_name(cmpyname): from googlefinance.client import get_price_data, get_prices_data, get_prices_time_data params = [{'q': cmpyname}] period = "5Y" # get open, high, low, close, volume data (return pandas dataframe) df = get_prices_data(params, period) df.reset_index(level=None, inplace=True) df = df.rename(index=str, columns={"index": "Date"}) df['Date'] = pd.to_datetime(df['Date']) df = df.set_index('Date') df['Close_diff'] = df[cmpyname + '_Close'] - df.shift()[cmpyname + '_Close'] df['Close_diff_log'] = np.log1p(df[cmpyname + '_Close']) - np.log1p( df.shift()[cmpyname + '_Close']) df = df[[cmpyname + '_Close', 'Close_diff', 'Close_diff_log']] df = df.dropna() train = df[0:880] test = df[880:] len(train), len(test) ts = train[cmpyname + '_Close'].as_matrix() predictions = np.empty((0), dtype=np.float32) for i in range(len(test)): arima_3_1_0 = ARIMA(ts, order=(3, 1, 0)).fit(dist=False) predict = arima_3_1_0.forecast()[0] predictions = np.hstack([predictions, predict]) ts = np.hstack([ts, predict]) predictions nans = np.zeros(len(train)) nans[:] = np.nan orgs = pd.concat([train[cmpyname + '_Close'], test[cmpyname + '_Close']]) orgs = pd.DataFrame({ 'Date': orgs.index, 'Original': orgs.as_matrix(), 'Prediction': np.hstack([nans, predictions]) }) orgs = orgs.set_index('Date') orgs.plot(color=['blue', 'red']) ## 1 year train = df[734:1100] test = df[1100:] #return len(train), len(test) ts = train[cmpyname + '_Close'].as_matrix() predictions = np.empty((0), dtype=np.float32) for i in range(len(test)): arima_3_1_0 = ARIMA(ts, order=(3, 1, 0)).fit(dist=False) predict = arima_3_1_0.forecast()[0] predictions = np.hstack([predictions, predict]) ts = np.hstack([ts, predict]) nans = np.zeros(len(train)) nans[:] = np.nan orgs = pd.concat([train[cmpyname + '_Close'], test[cmpyname + '_Close']]) orgs = pd.DataFrame({ 'Date': orgs.index, 'Original': orgs.as_matrix(), 'Prediction(trend)': np.hstack([nans, predictions]) }) orgs = orgs.set_index('Date') orgs.plot(color=['blue']) # return (plt.show()) plt.savefig('static/images/firstimage.png') return render_template("PredictionPage.html", imagename="static/images/firstimage.png")
def summary_metrics(compnyname): from googlefinance.client import get_price_data, get_prices_data, get_prices_time_data params = [{'q': compnyname}] period = "5Y" df = get_prices_data(params, period) Model_name = [] Mean_squared_error = [] R2_score = [] model = LinearRegression clf = LinearRegression(n_jobs=-1) X = df.iloc[:, [0, 1, 2, 4]].values Y = df.iloc[:, 3].values X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=np.random) clf.fit(X_train, Y_train) filename1 = 'Linear_Regression_model.pckl' pickle.dump(clf, open(filename1, 'wb')) prediction1 = clf.predict(X_test) MSE = mean_squared_error(Y_test, prediction1) R2 = r2_score(Y_test, prediction1) Model_name.append(' LinearRegression') Mean_squared_error.append(MSE) R2_score.append(R2) model = RandomForestRegressor regressor = RandomForestRegressor() X = df.iloc[:, [0, 1, 2, 4]].values Y = df.iloc[:, 3].values X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=np.random) regressor.fit(X_train, Y_train) filename2 = 'Random_Forest_Regressor_model.pckl' pickle.dump(regressor, open(filename2, 'wb')) prediction2 = regressor.predict(X_test) MSE = mean_squared_error(Y_test, prediction2) R2 = r2_score(Y_test, prediction2) Model_name.append('RandomForestRegressor') Mean_squared_error.append(MSE) R2_score.append(R2) model = ARIMA df = get_prices_data(params, period) df.reset_index(level=0, inplace=True) df = df.rename(index=str, columns={"index": "Date"}) df['Date'] = pd.to_datetime(df['Date']) df1 = df.set_index('Date') df2 = df1[compnyname + '_Close'] model = ARIMA(df2, order=(3, 1, 0)) model_fit = model.fit(disp=0) filename = 'ARIMA_model.pckl' pickle.dump(model, open(filename, 'wb')) X = df2.values size = int(len(X) * 0.80) train = X[0:size] test = X[size:len(X)] history = [x for x in train] predictions = list() for t in range(len(test)): model = ARIMA(history, order=(5, 1, 0)) model_fit = model.fit(disp=0) output = model_fit.forecast() yhat = output[0] predictions.append(yhat) obs = test[t] history.append(obs) ('predicted=%f, expected=%f' % (yhat, obs)) MSE = mean_squared_error(test, predictions) R2 = r2_score(test, predictions) Model_name.append('ARIMA') Mean_squared_error.append(MSE) R2_score.append(R2) summary2 = Model_name, Mean_squared_error, R2_score describe1 = pd.DataFrame(summary2[0], columns={"Model_Name"}) describe2 = pd.DataFrame(summary2[1], columns={"Mean_squared_error"}) describe3 = pd.DataFrame(summary2[2], columns={"R2_score"}) des = describe1.merge(describe2, left_index=True, right_index=True, how='inner') des = des.merge(describe3, left_index=True, right_index=True, how='inner') df = des.sort_values(ascending=False, by="R2_score").reset_index(drop=True) return (df)