def test_ridge_regression(): stock_d = testdata() ti = TechnicalIndicators(stock_d) filename = "test_N225_ridge.pickle" clffile = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "clf", filename) if os.path.exists(clffile): os.remove(clffile) clf = Regression(filename) ti.calc_ret_index() ret = ti.stock["ret_index"] base = ti.stock_raw["Adj Close"][0] train_X, train_y = clf.train(ret, regression_type="Ridge") test_y = clf.predict(ret, base) expected = 19177.97 r = round(test_y[0], 2) eq_(r, expected) if os.path.exists(clffile): os.remove(clffile)
def test_ridge_regression(): stock_d = testdata() ti = TechnicalIndicators(stock_d) filename = 'test_N225_ridge.pickle' clffile = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'clf', filename) if os.path.exists(clffile): os.remove(clffile) clf = Regression(filename) ti.calc_ret_index() ret = ti.stock['ret_index'] base = ti.stock_raw['Adj Close'][0] train_X, train_y = clf.train(ret, regression_type="Ridge") test_y = clf.predict(ret, base) expected = 19177.97 r = round(test_y[0], 2) eq_(r, expected) if os.path.exists(clffile): os.remove(clffile)
def predict(xlabel,ylabel,x_val,x,y): meanx=np.mean(x) stddevx=np.std(x) meany=np.mean(y) stddevy=np.std(y) x=featureNormalize(x) y=featureNormalize(y) reg = Regression() reg.set_learning_rate(0.01) reg.set_max_iterations(10000) reg.set_l1_penalty(0.1) reg.set_l2_penalty(0.1) reg.set_tolerance(1e-5) theta, cost, it = reg.polynomial_regression(x, y, 5) z = np.linspace(-1.9, 2.1, 4/0.01) prediction = reg.predict(z) x=np.array(x)*stddevx+meanx z=np.array(z)*stddevx+meanx y=np.array(y)*stddevy+meany prediction=np.array(prediction)*stddevy+meany x_val=(x_val-meanx)/stddevx y_val=reg.predict([x_val]) x_val=[x_val] x_val=np.array(x_val)*stddevx+meanx y_val=np.array(y_val)*stddevy+meany fig = plt.figure(figsize=(4,4)) plt.plot(x,y,'.', label='Input data') plt.plot(z,prediction,'r-', label='Best fit curve') plt.plot(x_val,y_val,'gx',label='Predicted Data') plt.legend(loc=4) title=xlabel + " vs " + ylabel plt.title(title,size=10) plt.xticks(size=8) plt.yticks(size=8) plt.close('all') return([y_val,fig])
def main(): X, y = make_regression(n_samples=100, n_features=1, noise=20) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) n_samples, n_features = np.shape(X) model = Regression(n_iterations=100, learning_rate=0.01) model.fit(X_train, y_train) # Training error plot n = len(model.training_errors) training, = plt.plot(range(n), model.training_errors, label="Training Error") plt.legend(handles=[training]) plt.title("Error Plot") plt.ylabel('Mean Squared Error') plt.xlabel('Iterations') plt.show() y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) print ("Mean squared error: %s" % (mse)) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") plt.suptitle("Base Regression") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.show()
def main(): #read input data my_data = np.genfromtxt('regression_data.txt', dtype=float, delimiter=',') # create input and output numpy arrays x = my_data[:,0] y = my_data[:,1] # create regression class object reg = Regression() # set learning rate reg.set_learning_rate(0.001) # set maximum iterations reg.set_max_iterations(20000) # set l1 and l2 penalty reg.set_l1_penalty(0.1) reg.set_l2_penalty(0.1) # set tolerance reg.set_tolerance(1e-5) # fit a polynomial regression model theta, cost, it = reg.polynomial_regression(x, y, 6) print "Regression coefficients :" + str(theta) print "Minimum cost function: " + str(cost) print "Iterations taken: " + str(it) # predict values for new input z = np.linspace(-2, 2, 4/0.01) prediction = reg.predict(z) # plot fig = plt.figure() plt.plot(x,y,'.', label='Input data') plt.plot(z,prediction,'r-', label='Prediction') plt.legend(loc=4) fig.suptitle('Polynomial Regression Fit') plt.xlabel('x (input)') plt.ylabel('y (predicted)') plt.savefig('fit_values.eps') plt.show()
def predict2(x_val,x,y): meanx=np.mean(x) stddevx=np.std(x) meany=np.mean(y) stddevy=np.std(y) x=featureNormalize(x) y=featureNormalize(y) reg = Regression() reg.set_learning_rate(0.001) reg.set_max_iterations(10000) reg.set_l1_penalty(0.1) reg.set_l2_penalty(0.1) reg.set_tolerance(1e-5) theta, cost, it = reg.polynomial_regression(x, y, 5) x_val=(x_val-meanx)/stddevx y_val=reg.predict([x_val]) y_val=np.array(y_val)*stddevy+meany return(y_val)
def run(self): io = FileIO() will_update = self.update if self.csvfile: stock_tse = io.read_from_csv(self.code, self.csvfile) msg = "".join([ "Read data from csv: ", self.code, " Records: ", str(len(stock_tse)) ]) print(msg) if self.update and len(stock_tse) > 0: index = pd.date_range(start=stock_tse.index[-1], periods=2, freq='B') ts = pd.Series(None, index=index) next_day = ts.index[1] t = next_day.strftime('%Y-%m-%d') newdata = io.read_data(self.code, start=t, end=self.end) msg = "".join([ "Read data from web: ", self.code, " New records: ", str(len(newdata)) ]) print(msg) if len(newdata) < 1: will_update = False else: print(newdata.ix[-1, :]) stock_tse = stock_tse.combine_first(newdata) io.save_data(stock_tse, self.code, 'stock_') else: stock_tse = io.read_data(self.code, start=self.start, end=self.end) msg = "".join([ "Read data from web: ", self.code, " Records: ", str(len(stock_tse)) ]) print(msg) if stock_tse.empty: msg = "".join(["Data empty: ", self.code]) print(msg) return None if not self.csvfile: io.save_data(stock_tse, self.code, 'stock_') try: stock_d = stock_tse.asfreq('B').dropna()[self.days:] ti = TechnicalIndicators(stock_d) ti.calc_sma() ti.calc_sma(timeperiod=5) ti.calc_sma(timeperiod=25) ti.calc_sma(timeperiod=50) ti.calc_sma(timeperiod=75) ewma = ti.calc_ewma(span=5) ewma = ti.calc_ewma(span=25) ewma = ti.calc_ewma(span=50) ewma = ti.calc_ewma(span=75) bbands = ti.calc_bbands() sar = ti.calc_sar() draw = Draw(self.code, self.fullname) ret = ti.calc_ret_index() ti.calc_vol(ret['ret_index']) rsi = ti.calc_rsi(timeperiod=9) rsi = ti.calc_rsi(timeperiod=14) mfi = ti.calc_mfi() roc = ti.calc_roc(timeperiod=10) roc = ti.calc_roc(timeperiod=25) roc = ti.calc_roc(timeperiod=50) roc = ti.calc_roc(timeperiod=75) roc = ti.calc_roc(timeperiod=150) ti.calc_cci() ultosc = ti.calc_ultosc() stoch = ti.calc_stoch() ti.calc_stochf() ti.calc_macd() willr = ti.calc_willr() ti.calc_momentum(timeperiod=10) ti.calc_momentum(timeperiod=25) tr = ti.calc_tr() ti.calc_atr() ti.calc_natr() vr = ti.calc_volume_rate() ret_index = ti.stock['ret_index'] clf = Classifier(self.clffile) train_X, train_y = clf.train(ret_index, will_update) msg = "".join(["Train Records: ", str(len(train_y))]) print(msg) clf_result = clf.classify(ret_index)[0] msg = "".join(["Classified: ", str(clf_result)]) print(msg) ti.stock.ix[-1, 'classified'] = clf_result reg = Regression(self.regfile, alpha=1, regression_type="Ridge") train_X, train_y = reg.train(ret_index, will_update) msg = "".join(["Train Records: ", str(len(train_y))]) base = ti.stock_raw['Adj Close'][0] reg_result = int(reg.predict(ret_index, base)[0]) msg = "".join(["Predicted: ", str(reg_result)]) print(msg) ti.stock.ix[-1, 'predicted'] = reg_result if len(self.reference) > 0: ti.calc_rolling_corr(self.reference) ref = ti.stock['rolling_corr'] else: ref = [] io.save_data(io.merge_df(stock_d, ti.stock), self.code, 'ti_') draw.plot(stock_d, ewma, bbands, sar, rsi, roc, mfi, ultosc, willr, stoch, tr, vr, clf_result, reg_result, ref, axis=self.axis, complexity=self.complexity) return ti except (ValueError, KeyError): msg = "".join(["Error occured in ", self.code]) print(msg) return None
def run(self): io = FileIO() will_update = self.update self.logger.info("".join(["Start Analysis: ", self.code])) if self.csvfile: stock_tse = io.read_from_csv(self.code, self.csvfile) self.logger.info("".join([ "Read data from csv: ", self.code, " Records: ", str(len(stock_tse)) ])) if self.update and len(stock_tse) > 0: index = pd.date_range(start=stock_tse.index[-1], periods=2, freq='B') ts = pd.Series(None, index=index) next_day = ts.index[1] t = next_day.strftime('%Y-%m-%d') newdata = io.read_data(self.code, start=t, end=self.end) self.logger.info("".join([ "Read data from web: ", self.code, " New records: ", str(len(newdata)) ])) if len(newdata) < 1: will_update = False else: print(newdata.ix[-1, :]) stock_tse = stock_tse.combine_first(newdata) io.save_data(stock_tse, self.code, 'stock_') else: stock_tse = io.read_data(self.code, start=self.start, end=self.end) self.logger.info("".join([ "Read data from web: ", self.code, " Records: ", str(len(stock_tse)) ])) if stock_tse.empty: self.logger.warn("".join(["Data empty: ", self.code])) return None if not self.csvfile: io.save_data(stock_tse, self.code, 'stock_') try: stock_d = stock_tse.asfreq('B').dropna()[self.minus_days:] ti = TechnicalIndicators(stock_d) ti.calc_sma() ti.calc_sma(timeperiod=5) ti.calc_sma(timeperiod=25) ti.calc_sma(timeperiod=50) ti.calc_sma(timeperiod=75) ti.calc_sma(timeperiod=200) ewma = ti.calc_ewma(span=5) ewma = ti.calc_ewma(span=25) ewma = ti.calc_ewma(span=50) ewma = ti.calc_ewma(span=75) ewma = ti.calc_ewma(span=200) bbands = ti.calc_bbands() sar = ti.calc_sar() draw = Draw(self.code, self.fullname) ret = ti.calc_ret_index() ti.calc_vol(ret['ret_index']) rsi = ti.calc_rsi(timeperiod=9) rsi = ti.calc_rsi(timeperiod=14) mfi = ti.calc_mfi() roc = ti.calc_roc(timeperiod=10) roc = ti.calc_roc(timeperiod=25) roc = ti.calc_roc(timeperiod=50) roc = ti.calc_roc(timeperiod=75) roc = ti.calc_roc(timeperiod=150) ti.calc_cci() ultosc = ti.calc_ultosc() stoch = ti.calc_stoch() ti.calc_stochf() ti.calc_macd() willr = ti.calc_willr() ti.calc_momentum(timeperiod=10) ti.calc_momentum(timeperiod=25) tr = ti.calc_tr() ti.calc_atr() ti.calc_natr() vr = ti.calc_volume_rate() ret_index = ti.stock['ret_index'] clf = Classifier(self.clffile) train_X, train_y = clf.train(ret_index, will_update) self.logger.info("".join( ["Classifier Train Records: ", str(len(train_y))])) clf_result = clf.classify(ret_index)[0] self.logger.info("".join(["Classified: ", str(clf_result)])) ti.stock.ix[-1, 'classified'] = clf_result reg = Regression(self.regfile, alpha=1, regression_type="Ridge") train_X, train_y = reg.train(ret_index, will_update) self.logger.info("".join( ["Regression Train Records: ", str(len(train_y))])) base = ti.stock_raw['Adj Close'][0] reg_result = int(reg.predict(ret_index, base)[0]) self.logger.info("".join(["Predicted: ", str(reg_result)])) ti.stock.ix[-1, 'predicted'] = reg_result if will_update is True: io.save_data(io.merge_df(stock_d, ti.stock), self.code, 'ti_') if self.minus_days < -300: _prefix = 'long' elif self.minus_days >= -60: _prefix = 'short' else: _prefix = 'chart' draw.plot(stock_d, _prefix, ewma, bbands, sar, rsi, roc, mfi, ultosc, willr, stoch, tr, vr, clf_result, reg_result, axis=self.axis, complexity=self.complexity) self.logger.info("".join(["Finish Analysis: ", self.code])) return ti except (ValueError, KeyError) as e: self.logger.error("".join( ["Error occured in ", self.code, " at analysis.py"])) self.logger.error("".join(['ErrorType: ', str(type(e))])) self.logger.error("".join(['ErrorMessage: ', str(e)])) return None
import numpy as np from featureScaling import featureScale from regression import Regression from sklearn.datasets import load_boston boston = load_boston() X = boston['data'] y = boston['target'] feature_names = boston['feature_names'] from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) reg = Regression() reg.gradientDescent(X_train, y_train, None, 0.05) print('test data: ',y_test[10:15]) print('predicted data: ',reg.predict(X_test[10:15])) reg.normalEquation(X_train, y_train) print('test data: ',y_test[10:15]) print('predicted data: ',reg.predict(X_test[10:15]))
#t = regression.predict(array([[cpu_percentage,cpu_time,m_available,m_swap,net_traffic, # transmission_capture,transmission_observer]], dtype=float)) #t = regression.predict(array([[cpu_percentage,cpu_time,m_available,m_swap,frame_rate]], dtype=float)) #print(t) poolSplit = str(data.split(',')[-1]).split(']')[0] pool = poolSplit.split()[0] #print(pool) features = array([[ cpu_percentage, cpu_time, m_available, m_swap, frame_rate ]], dtype=float) t = model_ufmg.predict( features ) if pool == "\'UFMG\'" else model_ufrgs.predict(features) #print(t) pools[pool] = t #pools[poolSplit] print(pools) #migrate(pools, args.x/args.fps) # Add output channel for response #print(regression.predict([[1.7,2014202.7,3940237312,89915392,0.0001804828643798828,1547732123.2172844,1547732122.9963086 if s not in outputs: outputs.append(s) else: # Interpret empty result as closed connection
cols_list.append(cols) data.scale_data(cols_list[:-1]) cols_list = data.get_cols() y_column_name = input("enter y column name: ") X_train, X_test, y_train, y_test = data.spilt_data(y_column_name) model_type = input("Enter R for Regression and C for Classification: ") if model_type == "C": print("Your options are: " + str(Classifier_list)) #add mode list modelname = input("Enter model to be used: ") classifier = Classification(X_train, X_test, y_train, y_test, modelname) classifier.predict() classifier.accuracy() classifier.save_model() elif model_type == 'R': print("Your options are: " + str(Regressor_list)) #add mode list modelname = input("Enter model to be used, use A for all") if modelname == "A": for modelname in Regressor_list: regressor = Regression(X_train, X_test, y_train, y_test, modelname) regressor.predict() regressor.accuracy() regressor.save_model() else: regressor = Regression(X_train, X_test, y_train, y_test, modelname) regressor.predict() regressor.accuracy() regressor.save_model()
def run(self): io = FileIO() will_update = self.update if self.csvfile: stock_tse = io.read_from_csv(self.code, self.csvfile) msg = "".join(["Read data from csv: ", self.code, " Records: ", str(len(stock_tse))]) print(msg) if self.update and len(stock_tse) > 0: index = pd.date_range(start=stock_tse.index[-1], periods=2, freq="B") ts = pd.Series(None, index=index) next_day = ts.index[1] t = next_day.strftime("%Y-%m-%d") newdata = io.read_data(self.code, start=t, end=self.end) msg = "".join(["Read data from web: ", self.code, " New records: ", str(len(newdata))]) print(msg) if len(newdata) < 1: will_update = False else: print(newdata.ix[-1, :]) stock_tse = stock_tse.combine_first(newdata) io.save_data(stock_tse, self.code, "stock_") else: stock_tse = io.read_data(self.code, start=self.start, end=self.end) msg = "".join(["Read data from web: ", self.code, " Records: ", str(len(stock_tse))]) print(msg) if stock_tse.empty: msg = "".join(["Data empty: ", self.code]) print(msg) return None if not self.csvfile: io.save_data(stock_tse, self.code, "stock_") try: stock_d = stock_tse.asfreq("B").dropna()[self.days :] ti = TechnicalIndicators(stock_d) ti.calc_sma() ti.calc_sma(timeperiod=5) ti.calc_sma(timeperiod=25) ti.calc_sma(timeperiod=50) ti.calc_sma(timeperiod=75) ewma = ti.calc_ewma(span=5) ewma = ti.calc_ewma(span=25) ewma = ti.calc_ewma(span=50) ewma = ti.calc_ewma(span=75) bbands = ti.calc_bbands() sar = ti.calc_sar() draw = Draw(self.code, self.name) ret = ti.calc_ret_index() ti.calc_vol(ret["ret_index"]) rsi = ti.calc_rsi(timeperiod=9) rsi = ti.calc_rsi(timeperiod=14) mfi = ti.calc_mfi() roc = ti.calc_roc(timeperiod=10) roc = ti.calc_roc(timeperiod=25) roc = ti.calc_roc(timeperiod=50) roc = ti.calc_roc(timeperiod=75) roc = ti.calc_roc(timeperiod=150) ti.calc_cci() ultosc = ti.calc_ultosc() stoch = ti.calc_stoch() ti.calc_stochf() ti.calc_macd() willr = ti.calc_willr() ti.calc_momentum(timeperiod=10) ti.calc_momentum(timeperiod=25) tr = ti.calc_tr() ti.calc_atr() ti.calc_natr() vr = ti.calc_volume_rate() ret_index = ti.stock["ret_index"] clf = Classifier(self.clffile) train_X, train_y = clf.train(ret_index, will_update) msg = "".join(["Train Records: ", str(len(train_y))]) print(msg) clf_result = clf.classify(ret_index)[0] msg = "".join(["Classified: ", str(clf_result)]) print(msg) ti.stock.ix[-1, "classified"] = clf_result reg = Regression(self.regfile, alpha=1, regression_type="Ridge") train_X, train_y = reg.train(ret_index, will_update) msg = "".join(["Train Records: ", str(len(train_y))]) base = ti.stock_raw["Adj Close"][0] reg_result = int(reg.predict(ret_index, base)[0]) msg = "".join(["Predicted: ", str(reg_result)]) print(msg) ti.stock.ix[-1, "predicted"] = reg_result if len(self.reference) > 0: ti.calc_rolling_corr(self.reference) ref = ti.stock["rolling_corr"] else: ref = [] io.save_data(io.merge_df(stock_d, ti.stock), self.code, "ti_") draw.plot( stock_d, ewma, bbands, sar, rsi, roc, mfi, ultosc, willr, stoch, tr, vr, clf_result, reg_result, ref, axis=self.axis, complexity=self.complexity, ) return ti except (ValueError, KeyError): msg = "".join(["Error occured in ", self.code]) print(msg) return None
data_path = "./hollywood.xls" data = pd.read_excel(data_path) x_train = data['X2'] y_train = data['X1'] z_train = data['X3'] # y must be an nX1 array # x myst be an nXm array where m is the number of different variables for the reggression # regression() returns an 1Xm+1 array wich are the weights +1 is for the constant x = panda_to_numpy(data, 'X2', 'X3') y = data['X1'] model = Regression(y, x) weights = model.train(epochs=200, a=0.0001, print_loss=True) prediction = model.predict(8, 15) print(prediction) # plot the results ones = np.ones(len(x_train)) x_normalized = np.linspace(x_train.min(), x_train.max(), len(x_train)) z_normalized = np.linspace(z_train.min(), z_train.max(), len(z_train)) x_pred = np.column_stack((x_normalized, z_normalized, ones)) y_pred = weights @ x_pred.T fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.set_title("Prediction of hollywood movie revenue") ax.set_zlabel("Revenue") ax.set_xlabel("Cost of production") ax.set_ylabel("Cost of marketing")
import matplotlib.animation as animation from regression import Regression, panda_to_numpy data_path = "./biocarbonate.xls" data = pd.read_excel(data_path) x_train = data['X'] y_train = data['Y'] # y must be an nX1 array # x myst be an nXm array where m is the number of different variables for the reggression # regression() returns an 1Xm+1 array wich are the weights +1 is for the constant x = panda_to_numpy(data, 'X') y = data['Y'] model = Regression(y, x) weights = model.train(epochs=200000, a=0.00005, print_loss=False) prediction = model.predict(8) print(prediction) # plot the results ones = np.ones(len(x_train)) x_pred = np.column_stack((np.linspace(x_train.min(), x_train.max(), len(x_train)), ones)) y_pred = weights @ x_pred.T plt.title("Prediction of bicarbonate") plt.xlabel("ph") plt.ylabel("biocarbonates ppm") plt.plot(x_train, y_train, "ro", x_pred, y_pred, "g--") plt.axis([x_train.min(), x_train.max(), y_train.min(), y_train.max()]) plt.show()
from preProcessor import PreProcessor import argparse from regression import Regression from picDrawer import PicDrawer if __name__ == '__main__': ''' python run -f [filepath] -s [filepath] -c [stock code] output: stock error : implement by regression.score() picture : implement by drawer ''' # create pre processor data_cleaner = PreProcessor() train_feature, train_label, test_feature, test_label = data_cleaner.run() reg = Regression() reg.fit(train_feature, train_label) pred_result = reg.predict(test_feature) score = reg.score(test_label, pred_result) drawer = PicDrawer() drawer.run()