Beispiel #1
0
    def __init__(self, baseDir, startDate, endDate):
        '''
        '''
        ticker = 'IBM'

        # FIRST: Take S&P500 tickers
        s_p500 = '/media/louis/DATA/documents/cours/NYU/SPRING_18/ATQS/HK1/s_p500.xlsx'
        s_p500xls = pd.read_excel(open(s_p500, 'rb'), sheet_name='WRDS')
        s_ptickers = np.unique(
            (np.array(s_p500xls['Ticker Symbol'])).astype(str))
        s_ptickers = s_ptickers[:-1]

        # THEN: Loop through tickers and stack them separately
        for ticker in s_ptickers:

            # Stack everything
            stack = StackData(baseDir, startDate, endDate, ticker)
            stack.addTrades()
            stack.addQuotes()

            # Get results
            quotes = stack.getStackedQuotes()
            trades = stack.getStackedTrades()

            # Adjust
            adjuster = TAQAdjust(quotes, trades, ticker, s_p500)
            adjuster.adjustQuote()
            adjuster.adjustTrade()

            # Clean
            cleaner = TAQCleaner(quotes, trades, k=5, gamma=0.005)
            quotes = np.delete(quotes, cleaner.cleanQuotesIndices(), axis=0)
            trades = np.delete(trades, cleaner.cleanTradesIndices(), axis=0)
        """ The datastructure to store those elements is up to you """
Beispiel #2
0
    def test1(self):
        stackedTrades = np.array(
            [['20070620', 'IBM', 34241000, 106.5, 85200.0],
             ['20070621', 'IBM', 57596000, 106.61000061035156, 500.0],
             ['20070621', 'IBM', 57596000, 106.61000061035156, 200.0],
             ['20070621', 'IBM', 57597000, 106.5999984741211, 200.0],
             ['20070621', 'IBM', 57597000, 106.5999984741211, 200.0],
             ['20070621', 'IBM', 57597000, 106.5999984741211, 200.0]])
        stackedQuotes = np.array(
            [['20070620', 'IBM', 34241000, 106.5, 85200.0, 106.1, 8200.0],
             ['20070621', 'IBM', 57597000, 106.5, 85200.0, 106.1, 800.0],
             ['20070621', 'IBM', 57597000, 106.5, 85200.0, 106.1, 800.0],
             ['20070621', 'IBM', 57597000, 106.5, 85200.0, 106.1, 800.0],
             ['20070621', 'IBM', 57597000, 106.5, 85200.0, 106.1, 800.0]])

        cleaner = TAQCleaner(stackedQuotes, stackedTrades, k=5, gamma=0.0005)

        # Initial quote check
        self.assertAlmostEquals(float(stackedQuotes[:, -4][0]), 106.5, 2)
        self.assertAlmostEquals(float(stackedQuotes[:, -2][0]), 106.1, 2)

        # Initial trade check
        self.assertAlmostEquals(float(stackedTrades[:, -2][0]), 106.5, 2)

        # Perturbation of the first midprice by a factor 10000
        stackedQuotes[:, -4][0] = float(stackedQuotes[:, -4][0]) * 10000
        stackedQuotes[:, -2][0] = float(stackedQuotes[:, -2][0]) * 10000

        # Perturbation of the first trade price by a factor 10000
        stackedTrades[:, -2][0] = float(stackedTrades[:, -2][0]) * 10000

        # Check the execution of the perturbations
        self.assertAlmostEquals(float(stackedQuotes[:, -4][0]), 1065000.0, 2)
        self.assertAlmostEquals(float(stackedQuotes[:, -2][0]), 1061000.0, 2)
        self.assertAlmostEquals(float(stackedTrades[:, -2][0]), 1065000.0, 2)

        # Execute quote cleaning
        stackedQuotes = np.delete(stackedQuotes,
                                  cleaner.cleanQuotesIndices(),
                                  axis=0)

        # Execute trade cleaning
        stackedTrades = np.delete(stackedTrades,
                                  cleaner.cleanTradesIndices(),
                                  axis=0)

        # Display
        print(stackedQuotes)
        print(stackedTrades)
Beispiel #3
0
def plotCleanAndBefore(s_p500, baseDir, filePathcln, ticker):
    # Multipliers map
    multmap = AdjustingHashmap(s_p500)
    print('Finished building multipliers map', ticker)

    # Stack
    stack = StackData(baseDir, '20070720', '20070730', ticker)
    stack.addQuotes()
    stack.addTrades()
    print('Finished stacking', ticker)

    # Get stacked results
    quotes = stack.getStackedQuotes()
    trades = stack.getStackedTrades()
    print('Got stacked results', ticker)

    # Adjustment
    adjuster = TAQAdjust(quotes, trades, ticker, multmap)
    adjuster.adjustQuote()
    adjuster.adjustTrade()
    quotesbefore = deepcopy(quotes)
    tradesbefore = deepcopy(trades)
    print('Finished adjustment', ticker)

    # Cleaning
    cleaner = TAQCleaner(quotes, trades, ticker)
    quotes = quotes[cleaner.cleanQuotesIndices() == True, :]
    trades = trades[cleaner.cleanTradesIndices() == True, :]
    lq1 = len(quotesbefore)
    lq2 = len(quotes)
    lt1 = len(tradesbefore)
    lt2 = len(trades)
    print('q before, q after', lq1, lq2)
    print('t before, t after', lt1, lt2)
    print('% trades removed:', (lt1 - lt2) / lt1)
    print('% quotes removed:', (lq1 - lq2) / lq1)
    print('Finished cleaning', ticker)

    # Plot quotes
    title = ticker + ' quotes before and after cleaning'
    outputFile = filePathcln + ticker + "quotes_cleaning.png"
    plotSeries(quotes, quotesbefore, 4, ticker, title, outputFile)

    # Plot trades
    title = ticker + ' trades before and after cleaning'
    outputFile = filePathcln + ticker + "trades_cleaning.png"
    plotSeries(trades, tradesbefore, 2, ticker, title, outputFile)
Beispiel #4
0
stack.addQuotes()
print('Finished stacking')

# Get results
quotes = stack.getStackedQuotes()
trades = stack.getStackedTrades()
print('Got results')

# Adjust
adjuster = TAQAdjust(quotes, trades, s_p500)
adjuster.adjustQuote()
adjuster.adjustTrade()
print('Adjusted')

# Clean
cleaner = TAQCleaner(quotes, trades)
quotes = np.delete(quotes, cleaner.cleanQuotesIndices(), axis=0)
trades = np.delete(trades, cleaner.cleanTradesIndices(), axis=0)
print('Cleaned')
'''
plotWindows = np.array([10, 30, 60, 300, 600, 900, 1800])
for x in range(len(plotWindows)):
    t_returns = getXSecTradeReturns(trades,plotWindows[x])[0]
    plotAutocorrelation(t_returns, 50, plotWindows[x])
'''
'''At this point, look at the data, the autocorrelation should drop off at around lag K=5?'''
K = 5
confidence = 0.95

testWindows = 60 * np.array([
    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
Beispiel #5
0
    s.addTrades()

k_test = [60, 45, 15, 60, 45, 15]
gamma_test = [0.02, 0.02, 0.02, 0.005, 0.005, 0.005]

i = 0
for k in k_test:
    j = 0
    for gamma in gamma_test:
        for s in stacks:
            tckr = s.getTicker()
            print(tckr)
            trades = s.getStackedTrades()
            if (trades.size == 0):
                continue
            cleaner = TAQCleaner([], trades, tckr, k, gamma, k, gamma)
            trades = trades[cleaner.cleanTradesIndices() == True, :]
            skews[i, j] += skew(np.array(trades[:, -2].astype(np.float)))
            kurtosiss[i,
                      j] += kurtosis(np.array(trades[:, -2].astype(np.float)))
        skews[i, j] = skews[i, j] / l
        kurtosiss[i, j] = kurtosiss[i, j] / l
        j += 1
        print(i, j)
    i += 1

min1 = np.unravel_index(np.argmin(skews, axis=None), skews.shape)
min2 = np.unravel_index(np.argmin(kurtosiss, axis=None), kurtosiss.shape)

# Display results, leave analysis to the programmer
Beispiel #6
0
endDate = '20070921'
ticker1 = 'MSFT'
ticker2 = 'GOOG'

# Stack everything
stack = StackData(baseDir, startDate, endDate, ticker1)
stack.addTrades()
stack.addQuotes()
print('Finished stacking MSFT')

# Get results
quotes = stack.getStackedQuotes()
trades = stack.getStackedTrades()
print('Got results MSFT')

# Adjust
adjuster = TAQAdjust(quotes, trades, s_p500)
adjuster.adjustQuote()
adjuster.adjustTrade()
print('Adjusted MSFT')

# Clean
cleaner = TAQCleaner(quotes, trades)
quotes = np.delete(quotes, cleaner.cleanQuotesIndices(), axis=0)
trades = np.delete(trades, cleaner.cleanTradesIndices(), axis=0)
print('Cleaned MSFT')

cleaner.storeCleanedQuotes(filepathcln)
cleaner.storeCleanedTrades(filepathcln)
print('Stored clean MSFT')
Beispiel #7
0
    trades = adjuster.getStackedTrades()

    print("* Adjusted at {:02f} secs".format(time.time() - startTime))
      
    print("----------------------------------------------------------------------")
    print("---------------- Stats for Adjusted but Unclean Data ------------------")
    print("----------------------------------------------------------------------")
    taqstats = printStats(trades, quotes, seconds)
      
    # Plot Trade and Mid-Quote Returns
    title = str(seconds) + ' seconds Trade and Mid-Quote Returns for ' + ticker + '\nwith the Adjusted Data'
    outputFile = "/Users/canelbiryol/Figs/" + ticker + "_" + str(seconds) + "sec_adjusted.png"
    plotReturns(taqstats, title, outputFile)
     
    ### Clean Data
    cleaner = TAQCleaner(quotes, trades, kT, gammaT, kQ, gammaQ )
     
     
    # Get results
    quotes = np.delete(quotes, cleaner.cleanQuotesIndices(), axis = 0)
    trades = np.delete(trades, cleaner.cleanTradesIndices(), axis = 0)
     
    print("* Cleaned at {:02f} secs".format(time.time() - startTime))
     
    print("----------------------------------------------------------------------")
    print("---------------- Stats for Adjusted and Clean Data ----------------")
    print("----------------------------------------------------------------------")
    taqstats = printStats(trades, quotes, seconds)
     
    # Plot Trade and Mid-Quote Returns
    title = '{:d} seconds Trade and Mid-Quote Returns for {:s}\nwith the Adjusted and Cleaned Data. ( kT: {:d}, gT: {:f}, kQ: {:d}, gQ: {:f} )'.format(
Beispiel #8
0
print('Finished stacking {:s} at {:.1f}s'.format(ticker, (end - start)))

# Get results
quotes = stack.getStackedQuotes()
trades = stack.getStackedTrades()
end = time.time()
print('Got results {:s} at {:.1f}s'.format(ticker, (end - start)))

# Adjust
adjuster = TAQAdjust(quotes, trades, ticker, multmap)
adjuster.adjustQuote()
adjuster.adjustTrade()
end = time.time()
print('Adjusted {:s} at {:.1f}s'.format(ticker, (end - start)))

# Clean
cleaner = TAQCleaner(quotes, trades, ticker)
indextrades = cleaner.cleanTradesIndices()
indexquotes = cleaner.cleanQuotesIndices()
quotes = quotes[indexquotes == True, :]
trades = trades[indextrades == True, :]
print((len(indextrades) - np.count_nonzero(indextrades)) / len(indextrades))
print((len(indexquotes) - np.count_nonzero(indexquotes)) / len(indexquotes))
end = time.time()
print('Cleaned {:s} at {:.1f}s'.format(ticker, (end - start)))

cleaner.storeCleanedQuotes(filepathcln)
cleaner.storeCleanedTrades(filepathcln)
end = time.time()
print('Stored cleaned {:s} at {:.1f}s'.format(ticker, (end - start)))
Beispiel #9
0
            # Get results
            quotes = stack.getStackedQuotes()
            trades = stack.getStackedTrades()
            end = time.time()
            print('Got results {:s} at {:.1f}s'.format(ticker, (end - start)))

            # Adjust
            adjuster = TAQAdjust(quotes, trades, ticker, multmap)
            adjuster.adjustQuote()
            adjuster.adjustTrade()
            end = time.time()
            print('Adjusted {:s} at {:.1f}s'.format(ticker, (end - start)))

            # Clean
            cleaner = TAQCleaner(quotes, trades, ticker)
            quotes = quotes[cleaner.cleanQuotesIndices() == True, :]
            trades = trades[cleaner.cleanTradesIndices() == True, :]
            end = time.time()
            print('Cleaned {:s} at {:.1f}s'.format(ticker, (end - start)))

            cleaner.storeCleanedQuotes(filepathcln)
            cleaner.storeCleanedTrades(filepathcln)
            end = time.time()
            print('Stored cleaned {:s} at {:.1f}s'.format(
                ticker, (end - start)))

            j = j + 1
        except Exception as e:
            errored.append(ticker)
            print("!!!! Failed processing ticker: {:s}".format(ticker))
print(stacks)
for s in stacks:
    print('One more stock stacked')
    s.addQuotes()

i = 0
for k in k_test:
    j = 0
    for gamma in gamma_test:
        for s in stacks:
            tckr = s.getTicker()
            print(tckr)
            quotes = s.getStackedQuotes()
            if (quotes.size == 0):
                continue
            cleaner = TAQCleaner(quotes, [], tckr, k, gamma, k, gamma)
            quotes = quotes[cleaner.cleanQuotesIndices()==True,:]
            skews[i,j] += skew(np.array(quotes[:,-2].astype(np.float)))
            kurtosiss[i,j] += kurtosis(np.array(quotes[:,-2].astype(np.float)))
        skews[i,j] = skews[i,j] / l
        kurtosiss[i,j] = kurtosiss[i,j] / l
        j += 1
        print(i,j)
    i += 1

min1 = np.unravel_index(np.argmin(skews, axis=None), skews.shape)
min2 = np.unravel_index(np.argmin(kurtosiss, axis=None), kurtosiss.shape)

# Display results, leave analysis to the programmer

print(min1)
Beispiel #11
0
stacks = np.array(
    [StackData(baseDir, startDate, endDate, ticker) for ticker in sampleticks])
print(stacks)
for s in stacks:
    print('hey')
    s.addTrades()

i = 0
for k in k_test:
    j = 0
    for gamma in gamma_test:
        for s in stacks:
            trades = s.getStackedTrades()
            if (trades.size == 0):
                continue
            cleaner = TAQCleaner([], trades, k, gamma)
            trades = np.delete(trades, cleaner.cleanTradesIndices(), axis=0)
            skews[i, j] += skew(np.array(trades[:, -2].astype(np.float)))
            kurtosiss[i,
                      j] += kurtosis(np.array(trades[:, -2].astype(np.float)))
        skews[i, j] = skews[i, j] / l
        kurtosiss[i, j] = kurtosiss[i, j] / l
        j += 1
        print(i, j)
    i += 1

min1 = np.unravel_index(np.argmin(skews, axis=None), skews.shape)
min2 = np.unravel_index(np.argmin(kurtosiss, axis=None), kurtosiss.shape)

# Display results, leave analysis to the programmer
Beispiel #12
0
    def test1(self):
        # Stocks and trades
        s_p500 = '/media/louis/DATA/documents/cours/NYU/SPRING_18/ATQS/HK1/s_p500.xlsx'
        stackedTrades = np.array(
            [['20070620', 34241000, 106.5, 85200.0],
             ['20070621', 57596000, 106.61000061035156, 500.0],
             ['20070621', 57596000, 106.61000061035156, 200.0],
             ['20070621', 57597000, 106.5999984741211, 200.0],
             ['20070621', 57597000, 106.5999984741211, 200.0],
             ['20070621', 57597000, 106.5999984741211, 200.0]])
        stackedQuotes = np.array(
            [['20070620', 34241000, 106.5, 85200.0, 106.1, 8200.0],
             ['20070621', 57597000, 106.5, 85200.0, 106.1, 800.0],
             ['20070621', 57597000, 106.5, 85200.0, 106.1, 800.0],
             ['20070621', 57597000, 106.5, 85200.0, 106.1, 800.0],
             ['20070621', 57597000, 106.5, 85200.0, 106.1, 800.0]])

        # Directories where to store
        filepathadj = '/media/louis/DATA/documents/cours/NYU/SPRING_18/ATQS/HK1/adj/'
        filepathcln = '/media/louis/DATA/documents/cours/NYU/SPRING_18/ATQS/HK1/cln/'

        # Multipliers map
        multmap = AdjustingHashmap(s_p500)

        # Write after reading and adjusting
        adjuster = TAQAdjust(stackedQuotes, stackedTrades, 'IBM', multmap)
        adjuster.setPriceMult("20070621", 2.0)
        adjuster.setVolMult("20070621", 4.0)
        adjuster.adjustQuote()
        adjuster.adjustTrade()
        adjuster.storeAdjustedQuotes(filepathadj)
        adjuster.storeAdjustedTrades(filepathadj)

        # Write after reading and cleaning
        cleaner = TAQCleaner(stackedQuotes, stackedTrades, 'IBM')
        stackedQuotes = stackedQuotes[cleaner.cleanQuotesIndices() == True, :]
        stackedTrades = stackedTrades[cleaner.cleanTradesIndices() == True, :]
        cleaner.storeCleanedQuotes(filepathcln)
        cleaner.storeCleanedTrades(filepathcln)

        # Read results
        readerclnQ = TAQQuotesReader(
            '/media/louis/DATA/documents/cours/NYU/SPRING_18/ATQS/HK1/cln/quotes/20070620/IBM_quotes.binRQ'
        )
        readerclnT = TAQTradesReader(
            '/media/louis/DATA/documents/cours/NYU/SPRING_18/ATQS/HK1/cln/trades/20070620/IBM_trades.binRT'
        )
        readeradjQ = TAQQuotesReader(
            '/media/louis/DATA/documents/cours/NYU/SPRING_18/ATQS/HK1/adj/quotes/20070620/IBM_quotes.binRQ'
        )
        readeradjT = TAQTradesReader(
            '/media/louis/DATA/documents/cours/NYU/SPRING_18/ATQS/HK1/adj/trades/20070620/IBM_trades.binRT'
        )

        # Using previously tested readers, test for expected values
        self.assertEquals(readerclnQ.getN(), 5)
        self.assertEquals(readerclnQ.getSecsFromEpocToMidn(), 0)
        self.assertEquals(readerclnQ.getMillisFromMidn(readerclnQ.getN() - 1),
                          57597000)
        self.assertEquals(readerclnQ.getBidSize(readerclnQ.getN() - 1), 21300)
        self.assertEquals(readerclnQ.getAskSize(readerclnQ.getN() - 1), 200)
        self.assertAlmostEquals(readerclnQ.getAskPrice(readerclnQ.getN() - 1),
                                53.0499, 3)
        self.assertAlmostEquals(readerclnQ.getBidPrice(readerclnQ.getN() - 1),
                                53.25, 3)

        # Using previously tested readers, test for expected values
        self.assertEquals(readeradjQ.getN(), 5)
        self.assertEquals(readeradjQ.getSecsFromEpocToMidn(), 0)
        self.assertEquals(readeradjQ.getMillisFromMidn(readeradjQ.getN() - 1),
                          57597000)
        self.assertEquals(readeradjQ.getBidSize(readeradjQ.getN() - 1), 21300)
        self.assertEquals(readeradjQ.getAskSize(readeradjQ.getN() - 1), 200)
        self.assertAlmostEquals(readeradjQ.getAskPrice(readeradjQ.getN() - 1),
                                53.0499, 3)
        self.assertAlmostEquals(readeradjQ.getBidPrice(readeradjQ.getN() - 1),
                                53.25, 3)

        # Using previously tested readers, test for expected values
        self.assertEquals(readerclnT.getN(), 6)
        self.assertEquals(readerclnT.getSecsFromEpocToMidn(), 0)
        self.assertEquals(readerclnT.getMillisFromMidn(readerclnT.getN() - 1),
                          57597000)
        self.assertEquals(readerclnT.getSize(readerclnT.getN() - 1), 50)
        self.assertAlmostEquals(readerclnT.getPrice(readerclnT.getN() - 1),
                                53.29999, 3)

        # Using previously tested readers, test for expected values
        self.assertEquals(readeradjT.getN(), 6)
        self.assertEquals(readeradjT.getSecsFromEpocToMidn(), 0)
        self.assertEquals(readeradjT.getMillisFromMidn(readeradjT.getN() - 1),
                          57597000)
        self.assertEquals(readeradjT.getSize(readeradjT.getN() - 1), 50)
        self.assertAlmostEquals(readerclnT.getPrice(readeradjT.getN() - 1),
                                53.29999, 3)