def Main(args): if (len(args) != 3 and len(args) != 4): PrintUsage() return #Test if file exists try: open(args[0]) except Exception as e: print('Error opening file: ' + args[0]) print(str(e)) PrintUsage() return #Test validity of start date string try: datetime.strptime(args[1], '%Y-%m-%d').timestamp() except Exception as e: print(e) print('Error parsing date: ' + args[1]) PrintUsage() return #Test validity of end date string try: datetime.strptime(args[2], '%Y-%m-%d').timestamp() except Exception as e: print('Error parsing date: ' + args[2]) PrintUsage() return #Test validity of final optional argument if (len(args) == 4): predPrd = args[3].upper() if (predPrd == 'D'): predPrd = 'daily' elif (predPrd == 'W'): predPrd = 'weekly' elif (predPrd == 'M'): predPrd = 'monthly' else: PrintUsage() return else: predPrd = 'daily' #Everything looks okay; proceed with program #Grab the data frame D = ParseData(args[0]) #The number of previous days of data used #when making a prediction numPastDays = 16 PlotData(D) #Number of neurons in the input layer i = numPastDays * 7 + 1 #Number of neurons in the output layer o = D.shape[1] - 1 #Number of neurons in the hidden layers h = int((i + o) / 2) #The list of layer sizes #layers = [('F', h), ('AF', 'tanh'), ('F', h), ('AF', 'tanh'), ('F', o)] #R = ANNR([i], layers, maxIter = 1000, tol = 0.01, reg = 0.001, verbose = True) R = KNeighborsRegressor(n_neighbors=5) sp = StockPredictor(R, nPastDays=numPastDays) #Learn the dataset and then display performance statistics sp.Learn(D) sp.TestPerformance() #Perform prediction for a specified date range P = sp.PredictDate(args[1], args[2], predPrd) #Keep track of number of predicted results for plot n = P.shape[0] #Append the predicted results to the actual results D = P.append(D) #Predicted results are the first n rows PlotData(D, range(n + 1)) return (P, n)
ani = mpla.FuncAnimation(fig, UpdateF, frames=2000, interval=128, repeat=False) # ani.save('foo.gif') mpl.show() return ani if __name__ == "__main__": # Data in data.csv is assumed to contain 'High' and 'Timestamp' columns; # code below will scale it to unsigned ints # In a true example, data in data.csv would be high dimensional binary vectors D = ParseData('data.csv')[['Timestamp', 'High']] D['Timestamp'] = D['Timestamp'] - np.min(D['Timestamp']) D = np.floor(scale(D) * 16000) D = D - np.min(D, axis=0) aInt = np.int64(D[:, 0]) yInt = np.int64(D[:, 1]) # Max number of bits needed to represent vectors in A aMaxLen = math.floor(math.log2(np.max(aInt))) + 1 # Max number of bits needed to represent vectors in Y yMaxLen = math.floor(math.log2(np.max(yInt))) + 1 n = aInt.shape[0] A = np.zeros([n, aMaxLen], dtype=np.int) Y = np.zeros([n, yMaxLen], dtype=np.int) for i in range(n): IntToBinVec(yInt[i], Y[i]) IntToBinVec(aInt[i], A[i])
#Create the KNN classifier using NNeighbors as a parameter R = KNeighborsClassifier(NNeighbors) for i in range((NNeighbors + 1) * interval, len(D.Timestamp), interval): #Fit the classifier to the NNeighbor points based on the interval and the target return R.fit( knnIn[range(i - ((NNeighbors) * interval), i, interval)].reshape(-1, 1), knnTarget[range(i - ((NNeighbors) * interval), i, interval)].ravel()) #Make a prediction with the fitted classifier knnPredict[i] = R.predict(knnIn[i].reshape(-1, 1)) #DEBUG #print(knnIn[range(i - ((NNeighbors)*interval),i, interval)].reshape(-1, 1), knnTarget[range(i - ((NNeighbors)*interval),i,interval)].ravel(), knnTarget[i], knnPredict[i]) Err = 0 #Compute the classification accuracy for i in range((NNeighbors + 1) * interval, len(D.Timestamp), interval): if knnPredict[i] != knnTarget[i]: Err += 1 #print(knnTarget[i], knnPredict[i], i) #Divide the calculated error based on number of samples (input matrices could be sparse based on input) Err = Err / ((len(D.Timestamp) - NNeighbors) / interval) print(Err) path = os.path.dirname( os.path.realpath(__file__)) + '\\daily_adjusted_AMZN.csv' D = ParseData(path) for i in range(1, 13, 2): print(i) KNNRegression(i, D)
def Main(args): if (len(args) != 3 and len(args) != 4): PrintUsage() return #Obtain CSV from Internet print( '\n############# Downloading Historical Data from Internet #############' ) file = args[0] + '.csv' str2 = 'http://real-chart.finance.yahoo.com/table.csv?s=' + args[ 0] + '.NS&d=02&e=16&f=2017&g=d&a=0&b=1&c=1996&ignore=.csv' f = open(file, 'wb') f.write(requests.get(str2).content) f.close() print('\n############# Download Complete ##############') #Test if file exists try: open(file) except Exception as e: print('Error opening file: ' + file) print(str(e)) PrintUsage() return #Test validity of start date string try: datetime.strptime(args[1], '%Y-%m-%d').timestamp() except Exception as e: print('Error parsing date: ' + args[1]) PrintUsage() return #Test validity of end date string try: datetime.strptime(args[2], '%Y-%m-%d').timestamp() except Exception as e: print('Error parsing date: ' + args[2]) PrintUsage() return #Test validity of final optional argument if (len(args) == 4): predPrd = args[3].upper() if (predPrd == 'D'): predPrd = 'daily' elif (predPrd == 'W'): predPrd = 'weekly' elif (predPrd == 'M'): predPrd = 'monthly' else: PrintUsage() return else: predPrd = 'daily' print('\n############# Processing on Data ##############') #Everything looks okay; proceed with program D = ParseData(file) #The number of previous days of data used #when making a prediction numPastDays = 20 PlotData(D, args[0]) #Number of neurons in the input layer i = numPastDays * 7 + 1 #Number of neurons in the output layer o = D.shape[1] - 1 #Number of neurons in the hidden layers h = int((i + o) / 2) #The list of layer sizes layers = [i, h, h, o] #Type of Regressor Used #R = RandomForestRegressor(n_estimators = 5) R = MLPR(layers, maxItr=1000, tol=0.40, reg=0.001, verbose=True) sp = StockPredictor(R, nPastDays=numPastDays) #Learn the dataset and then display performance statistics sp.Learn(D) #sp.TestPerformance() #Perform prediction for a specified date range P, R = sp.PredictDate(args[1], args[2], predPrd) print('\n############# Predection is on the way ##############') #Keep track of number of predicted results for plot n = P.shape[0] #Append the predicted results to the actual results D = P.append(D) #Predicted results are the first n rows PlotData(D, args[0], range(n + 1)) return (R, n)
import os import numpy as np import matplotlib.pyplot as mpl from sklearn.preprocessing import scale from StockPredictor import ParseData from StockPredictor import PlotData def plotScaledData(filePath): pth = filePath + '\\daily_adjusted_AMZN.csv' A = np.loadtxt(pth, delimiter=",", skiprows=1, usecols=(1, 3)) A = scale(A) #y is the dependent variable y = A[:, 1].reshape(-1, 1) #A contains the independent variable A = A[:, 0].reshape(-1, 1) #Plot the high value of the stock price mpl.plot(A[:, 0], y[:, 0]) mpl.show() return if __name__ == "__main__": path = os.path.dirname(os.path.realpath(__file__)) #plotScaledData(path) df = ParseData(path + '\\validation.csv') PlotData(df)
def Main(args): if(len(args) != 3 and len(args) != 4): PrintUsage() return #Test if file exists try: open(args[0]) except Exception as e: print('Error opening file: ' + args[0]) print(str(e)) PrintUsage() return #Test validity of start date string try: datetime.strptime(args[1], '%Y-%m-%d').timestamp() except Exception as e: print('Error parsing date: ' + args[1]) PrintUsage() return #Test validity of end date string try: datetime.strptime(args[2], '%Y-%m-%d').timestamp() except Exception as e: print('Error parsing date: ' + args[2]) PrintUsage() return #Test validity of final optional argument if(len(args) == 4): predPrd = args[3].upper() if(predPrd == 'D'): predPrd = 'daily' elif(predPrd == 'W'): predPrd = 'weekly' elif(predPrd == 'M'): predPrd = 'monthly' else: PrintUsage() return else: predPrd = 'daily' #Everything looks okay; proceed with program #Grab the data frame D = ParseData(args[0]) #The number of previous days of data used #when making a prediction numPastDays = 15 #PlotData(D) #Number of neurons in the input layer i = numPastDays * 7 + 1 #Number of neurons in the output layer o = D.shape[1] - 1 #Number of neurons in the hidden layers h = int((i + o) / 2) #The list of layer sizes layers = [i, h, h, h, h,h,h, o] R = MLPR(layers, maxItr = 100000, tol = 0.01, reg = 0.001, verbose = True, batchSize=200) #R = KNeighborsRegressor(n_neighbors = 7) sp = StockPredictor(R, nPastDays = numPastDays) #Learn the dataset and then display performance statistics size=len(D) trainingData =DataFrame.reset_index(D[5:size], drop=True) print(trainingData) sp.Learn(trainingData) sp.TestPerformance() #Perform prediction for a specified date range P= sp.PredictDate(args[1], args[2], predPrd) #Keep track of number of predicted results for plot print("done") print(P) n = P.shape[0] print("nnnnn") print(n) #Append the predicted results to the actual results #D = P.append(D) #Predicted results are the first n rows PlotTestData(P, D[0:5]) return (P, n)
UpdateF, frames=2000, interval=128, repeat=False) #ani.save('foo.gif') mpl.show() return ani if __name__ == "__main__": #Data in data.csv is assumed to contain 'High' and 'Timestamp' columns; #code below will scale it to unsigned ints #In a true example, data in data.csv would be high dimensional binary vectors path = os.path.dirname( os.path.realpath(__file__)) + '\\daily_adjusted_AMZN.csv' D = ParseData(path)[['Timestamp', 'high']] D['Timestamp'] = D['Timestamp'] - np.min(D['Timestamp']) D = np.floor(scale(D) * 16000) D = D - np.min(D, axis=0) aInt = np.int64(D[:, 0]) yInt = np.int64(D[:, 1]) #Max number of bits needed to represent vectors in A aMaxLen = math.floor(math.log2(np.max(aInt))) + 1 #Max number of bits needed to represent vectors in Y yMaxLen = math.floor(math.log2(np.max(yInt))) + 1 n = aInt.shape[0] A = np.zeros([n, aMaxLen], dtype=np.int) Y = np.zeros([n, yMaxLen], dtype=np.int) for i in range(n): IntToBinVec(yInt[i], Y[i]) IntToBinVec(aInt[i], A[i])