Example #1
0
def Main(args):
    if (len(args) != 3 and len(args) != 4):
        PrintUsage()
        return
    #Test if file exists
    try:
        open(args[0])
    except Exception as e:
        print('Error opening file: ' + args[0])
        print(str(e))
        PrintUsage()
        return
    #Test validity of start date string
    try:
        datetime.strptime(args[1], '%Y-%m-%d').timestamp()
    except Exception as e:
        print(e)
        print('Error parsing date: ' + args[1])
        PrintUsage()
        return
    #Test validity of end date string
    try:
        datetime.strptime(args[2], '%Y-%m-%d').timestamp()
    except Exception as e:
        print('Error parsing date: ' + args[2])
        PrintUsage()
        return
    #Test validity of final optional argument
    if (len(args) == 4):
        predPrd = args[3].upper()
        if (predPrd == 'D'):
            predPrd = 'daily'
        elif (predPrd == 'W'):
            predPrd = 'weekly'
        elif (predPrd == 'M'):
            predPrd = 'monthly'
        else:
            PrintUsage()
            return
    else:
        predPrd = 'daily'
    #Everything looks okay; proceed with program
    #Grab the data frame
    D = ParseData(args[0])
    #The number of previous days of data used
    #when making a prediction
    numPastDays = 16
    PlotData(D)
    #Number of neurons in the input layer
    i = numPastDays * 7 + 1
    #Number of neurons in the output layer
    o = D.shape[1] - 1
    #Number of neurons in the hidden layers
    h = int((i + o) / 2)
    #The list of layer sizes
    #layers = [('F', h), ('AF', 'tanh'), ('F', h), ('AF', 'tanh'), ('F', o)]
    #R = ANNR([i], layers, maxIter = 1000, tol = 0.01, reg = 0.001, verbose = True)
    R = KNeighborsRegressor(n_neighbors=5)
    sp = StockPredictor(R, nPastDays=numPastDays)
    #Learn the dataset and then display performance statistics
    sp.Learn(D)
    sp.TestPerformance()
    #Perform prediction for a specified date range
    P = sp.PredictDate(args[1], args[2], predPrd)
    #Keep track of number of predicted results for plot
    n = P.shape[0]
    #Append the predicted results to the actual results
    D = P.append(D)
    #Predicted results are the first n rows
    PlotData(D, range(n + 1))
    return (P, n)
    ani = mpla.FuncAnimation(fig,
                             UpdateF,
                             frames=2000,
                             interval=128,
                             repeat=False)
    # ani.save('foo.gif')
    mpl.show()
    return ani


if __name__ == "__main__":
    # Data in data.csv is assumed to contain 'High' and 'Timestamp' columns;
    # code below will scale it to unsigned ints
    # In a true example, data in data.csv would be high dimensional binary vectors
    D = ParseData('data.csv')[['Timestamp', 'High']]
    D['Timestamp'] = D['Timestamp'] - np.min(D['Timestamp'])
    D = np.floor(scale(D) * 16000)
    D = D - np.min(D, axis=0)
    aInt = np.int64(D[:, 0])
    yInt = np.int64(D[:, 1])
    # Max number of bits needed to represent vectors in A
    aMaxLen = math.floor(math.log2(np.max(aInt))) + 1
    # Max number of bits needed to represent vectors in Y
    yMaxLen = math.floor(math.log2(np.max(yInt))) + 1
    n = aInt.shape[0]
    A = np.zeros([n, aMaxLen], dtype=np.int)
    Y = np.zeros([n, yMaxLen], dtype=np.int)
    for i in range(n):
        IntToBinVec(yInt[i], Y[i])
        IntToBinVec(aInt[i], A[i])
Example #3
0
    #Create the KNN classifier using NNeighbors as a parameter
    R = KNeighborsClassifier(NNeighbors)
    for i in range((NNeighbors + 1) * interval, len(D.Timestamp), interval):
        #Fit the classifier to the NNeighbor points based on the interval and the target return
        R.fit(
            knnIn[range(i - ((NNeighbors) * interval), i,
                        interval)].reshape(-1, 1),
            knnTarget[range(i - ((NNeighbors) * interval), i,
                            interval)].ravel())
        #Make a prediction with the fitted classifier
        knnPredict[i] = R.predict(knnIn[i].reshape(-1, 1))
        #DEBUG
        #print(knnIn[range(i - ((NNeighbors)*interval),i, interval)].reshape(-1, 1), knnTarget[range(i - ((NNeighbors)*interval),i,interval)].ravel(), knnTarget[i], knnPredict[i])
    Err = 0
    #Compute the classification accuracy
    for i in range((NNeighbors + 1) * interval, len(D.Timestamp), interval):
        if knnPredict[i] != knnTarget[i]:
            Err += 1
        #print(knnTarget[i], knnPredict[i], i)
    #Divide the calculated error based on number of samples (input matrices could be sparse based on input)
    Err = Err / ((len(D.Timestamp) - NNeighbors) / interval)
    print(Err)


path = os.path.dirname(
    os.path.realpath(__file__)) + '\\daily_adjusted_AMZN.csv'
D = ParseData(path)

for i in range(1, 13, 2):
    print(i)
    KNNRegression(i, D)
Example #4
0
def Main(args):
    if (len(args) != 3 and len(args) != 4):
        PrintUsage()
        return

    #Obtain CSV from Internet
    print(
        '\n#############   Downloading Historical Data from Internet   #############'
    )
    file = args[0] + '.csv'
    str2 = 'http://real-chart.finance.yahoo.com/table.csv?s=' + args[
        0] + '.NS&d=02&e=16&f=2017&g=d&a=0&b=1&c=1996&ignore=.csv'
    f = open(file, 'wb')
    f.write(requests.get(str2).content)
    f.close()
    print('\n#############   Download Complete   ##############')
    #Test if file exists
    try:
        open(file)
    except Exception as e:
        print('Error opening file: ' + file)
        print(str(e))
        PrintUsage()
        return
    #Test validity of start date string
    try:
        datetime.strptime(args[1], '%Y-%m-%d').timestamp()
    except Exception as e:
        print('Error parsing date: ' + args[1])
        PrintUsage()
        return
    #Test validity of end date string
    try:
        datetime.strptime(args[2], '%Y-%m-%d').timestamp()
    except Exception as e:
        print('Error parsing date: ' + args[2])
        PrintUsage()
        return
    #Test validity of final optional argument
    if (len(args) == 4):
        predPrd = args[3].upper()
        if (predPrd == 'D'):
            predPrd = 'daily'
        elif (predPrd == 'W'):
            predPrd = 'weekly'
        elif (predPrd == 'M'):
            predPrd = 'monthly'
        else:
            PrintUsage()
            return
    else:
        predPrd = 'daily'
    print('\n#############   Processing on Data   ##############')
    #Everything looks okay; proceed with program
    D = ParseData(file)
    #The number of previous days of data used
    #when making a prediction
    numPastDays = 20
    PlotData(D, args[0])
    #Number of neurons in the input layer
    i = numPastDays * 7 + 1
    #Number of neurons in the output layer
    o = D.shape[1] - 1
    #Number of neurons in the hidden layers
    h = int((i + o) / 2)
    #The list of layer sizes
    layers = [i, h, h, o]
    #Type of Regressor Used
    #R = RandomForestRegressor(n_estimators = 5)
    R = MLPR(layers, maxItr=1000, tol=0.40, reg=0.001, verbose=True)
    sp = StockPredictor(R, nPastDays=numPastDays)
    #Learn the dataset and then display performance statistics
    sp.Learn(D)
    #sp.TestPerformance()
    #Perform prediction for a specified date range
    P, R = sp.PredictDate(args[1], args[2], predPrd)
    print('\n#############   Predection is on the way   ##############')
    #Keep track of number of predicted results for plot
    n = P.shape[0]
    #Append the predicted results to the actual results
    D = P.append(D)
    #Predicted results are the first n rows
    PlotData(D, args[0], range(n + 1))

    return (R, n)
Example #5
0
import os
import numpy as np
import matplotlib.pyplot as mpl
from sklearn.preprocessing import scale
from StockPredictor import ParseData
from StockPredictor import PlotData


def plotScaledData(filePath):
    pth = filePath + '\\daily_adjusted_AMZN.csv'
    A = np.loadtxt(pth, delimiter=",", skiprows=1, usecols=(1, 3))
    A = scale(A)
    #y is the dependent variable
    y = A[:, 1].reshape(-1, 1)
    #A contains the independent variable
    A = A[:, 0].reshape(-1, 1)
    #Plot the high value of the stock price
    mpl.plot(A[:, 0], y[:, 0])
    mpl.show()
    return


if __name__ == "__main__":
    path = os.path.dirname(os.path.realpath(__file__))
    #plotScaledData(path)
    df = ParseData(path + '\\validation.csv')
    PlotData(df)
Example #6
0
def Main(args):
    if(len(args) != 3 and len(args) != 4):
        PrintUsage()
        return
    #Test if file exists
    try:
        open(args[0])
    except Exception as e:
        print('Error opening file: ' + args[0])
        print(str(e))
        PrintUsage()
        return
    #Test validity of start date string
    try:
        datetime.strptime(args[1], '%Y-%m-%d').timestamp()
    except Exception as e:
        print('Error parsing date: ' + args[1])
        PrintUsage()
        return
    #Test validity of end date string
    try:
        datetime.strptime(args[2], '%Y-%m-%d').timestamp()
    except Exception as e:
        print('Error parsing date: ' + args[2])
        PrintUsage()
        return    
    #Test validity of final optional argument
    if(len(args) == 4):
        predPrd = args[3].upper()
        if(predPrd == 'D'):
            predPrd = 'daily'
        elif(predPrd == 'W'):
            predPrd = 'weekly'
        elif(predPrd == 'M'):
            predPrd = 'monthly'
        else:
            PrintUsage()
            return
    else:
        predPrd = 'daily'
    #Everything looks okay; proceed with program
    #Grab the data frame
    D = ParseData(args[0])
    #The number of previous days of data used
    #when making a prediction
    numPastDays = 15
    #PlotData(D)
    #Number of neurons in the input layer
    i = numPastDays * 7 + 1
    #Number of neurons in the output layer
    o = D.shape[1] - 1
    #Number of neurons in the hidden layers
    h = int((i + o) / 2)
    #The list of layer sizes
    layers = [i, h, h, h, h,h,h, o]
    R = MLPR(layers, maxItr = 100000, tol = 0.01, reg = 0.001, verbose = True, batchSize=200)
    #R = KNeighborsRegressor(n_neighbors = 7)
    sp = StockPredictor(R, nPastDays = numPastDays)


    #Learn the dataset and then display performance statistics
    size=len(D)
    trainingData =DataFrame.reset_index(D[5:size], drop=True)
    print(trainingData)
    sp.Learn(trainingData)
    sp.TestPerformance()
    #Perform prediction for a specified date range
    P= sp.PredictDate(args[1], args[2], predPrd)
    #Keep track of number of predicted results for plot
    print("done")
    print(P)
    n = P.shape[0]
    print("nnnnn")
    print(n)
    #Append the predicted results to the actual results
    #D = P.append(D)
    #Predicted results are the first n rows
    PlotTestData(P, D[0:5])
    return (P, n)
Example #7
0
                             UpdateF,
                             frames=2000,
                             interval=128,
                             repeat=False)
    #ani.save('foo.gif')
    mpl.show()
    return ani


if __name__ == "__main__":
    #Data in data.csv is assumed to contain 'High' and 'Timestamp' columns;
    #code below will scale it to unsigned ints
    #In a true example, data in data.csv would be high dimensional binary vectors
    path = os.path.dirname(
        os.path.realpath(__file__)) + '\\daily_adjusted_AMZN.csv'
    D = ParseData(path)[['Timestamp', 'high']]
    D['Timestamp'] = D['Timestamp'] - np.min(D['Timestamp'])
    D = np.floor(scale(D) * 16000)
    D = D - np.min(D, axis=0)
    aInt = np.int64(D[:, 0])
    yInt = np.int64(D[:, 1])
    #Max number of bits needed to represent vectors in A
    aMaxLen = math.floor(math.log2(np.max(aInt))) + 1
    #Max number of bits needed to represent vectors in Y
    yMaxLen = math.floor(math.log2(np.max(yInt))) + 1
    n = aInt.shape[0]
    A = np.zeros([n, aMaxLen], dtype=np.int)
    Y = np.zeros([n, yMaxLen], dtype=np.int)
    for i in range(n):
        IntToBinVec(yInt[i], Y[i])
        IntToBinVec(aInt[i], A[i])