def testCheckrequiredColumnsWithReqColFail(self): '''Test method DataFrameUtil.checkRequiredInputFields''' reqCol = ReqCol() finReqCol = FinReqCol() fail = pd.DataFrame( columns=['Time', 'Symb', 'Side', 'Price', 'Qty', 'Account']) rc = pd.DataFrame(columns=reqCol.columns) gotve = False try: DataFrameUtil.checkRequiredInputFields(fail, reqCol.columns) except ValueError as ex: print(ex) gotve = True finally: self.assertTrue(gotve, "Failed to throw value error") gotve = False try: DataFrameUtil.checkRequiredInputFields(rc, finReqCol.columns) except ValueError as ex: gotve = True finally: self.assertTrue(gotve, "Failed to throw a ValueError")
def testGetListTickerDF(self): ''' Testing Statement_DAS.getListTickerDF ''' rc = ReqCol() tickers = [ 'MU', 'MU', 'MU', 'TWTR', 'TWTR', 'TWTR', 'TWTR', 'TWTR', 'TWTR', 'AAPL', 'AAPL', 'AAPL', 'AAPL', 'AAPL', 'AAPL', 'AAPL', 'MU', 'MU', 'MU' ] U1 = "U12345" U2 = "TR12345" accounts = [ U1, U1, U1, U1, U1, U1, U2, U2, U2, U2, U1, U2, U2, U1, U1, U1, U2, U2, U2 ] testSet = list(zip(tickers, accounts)) apd = pd.DataFrame(testSet, columns=[rc.ticker, rc.acct]) ipd = InputDataFrame() listDf = ipd.getListTickerDF(apd) #A dataframe for each ticker in both accounts self.assertEqual(len(listDf), 6) for df in listDf: self.assertEqual(len(df[rc.ticker].unique()), 1) self.assertEqual(len(df[rc.acct].unique()), 1)
def createSingleTicket(self, tickTx): ''' Create a single row ticket from a Dataframe with a list (1 or more) of Transactions. :prerequisites: tickTx needs to have 'len(unique()) == 1' for side, symb, account, and cloid. That uniqueness is created in getListOfTicketDF() so use that to create a list of ticktTX. :params tickTx: A DataFrame with transactions from a single ticket :return: A single row data frame with total shares and average price. ''' rc = ReqCol() total = 0 totalPL = 0 for dummy, row in tickTx.iterrows(): total = total + (row[rc.price] * row[rc.shares]) totalPL = totalPL + row[rc.PL] totalShares = tickTx[rc.shares].sum() avgPrice = total / totalShares newDf = DataFrameUtil.createDf(tickTx, 0) oneRow = tickTx.sort_values([rc.time, rc.price]).head(1) newDf = newDf.append(oneRow) newDf.copy() newDf[rc.price] = avgPrice newDf[rc.shares] = totalShares newDf[rc.PL] = totalPL return newDf
def getTrades(self, listDf=None): ''' Create an alternate dataFrame by ticket. For large share sizes this may have dramatically fewer transactions. :params listDf: Normally leave blank. If used, listDf should be the be a list of DFs. :params jf: A JournalFiles object as this new CSV file needs to be written into the outdir. :return: The DataFrame created version of the data. :side effects: Saves a csv file of all transactions as single ticket transactions to jf.inpathfile ''' # TODO: Add the date to the saved file name after we get the date sorted out. rc = ReqCol() if not listDf: listDf = self.getListOfTicketDF() DataFrameUtil.checkRequiredInputFields(listDf[0], rc.columns) newDF = DataFrameUtil.createDf(listDf[0], 0) for tick in listDf: t = self.createSingleTicket(tick) newDF = newDF.append(t) outfile = "tradesByTicket.csv" opf = os.path.join(self.jf.indir, outfile) newDF.to_csv(opf) self.jf.resetInfile(outfile) return newDF, self.jf
def qtSwing(self, df, swingTrade): c = ReqCol() for strade in swingTrade: ubc = Ubc() print(strade['acct'], strade['ticker'], strade['shares']) trade = df[(df[c.ticker] == strade['ticker']) & (df[c.acct] == strade['acct'])] keepTrying = True while keepTrying: ubc.runDialog(trade, strade['ticker'], strade['shares'], strade) ok = ubc.exec() print(ok) if strade['shares'] != 0: msg = strade[ 'ticker'] + ' still has unbalanced amounts. The trade must be balanced\n' msg += 'to continue. Would you like to continue?\n' ok = QMessageBox.question(ubc, 'ShareBalance', msg, QMessageBox.Yes | QMessageBox.No, QMessageBox.No) if ok == QMessageBox.Yes: print('Yes clicked.') ubc = Ubc() else: print('No clicked.') return None, False else: keepTrying = False print() print(swingTrade) return swingTrade, True
def getOvernightTrades(self, dframe): ''' Create the overnightTrade (aka swingTrade data structure) from the list of overnight holds. Overnight holds are inferred from an unbalanced number of shares. Until we ask the user, we won't know whether before or after or both :params dframe: The Original unaltered input file with the days trades that includes the columns rc.ticker and rc.share :return: overnightTrades, a list of dict The dict has the keys (ticker, shares, before, after, acct) Elsewhere in the program the variable is referred to as swingTrade or swtrade. We do not have the info whether there was shares held before open or shares are held after close or both. ''' rc = ReqCol() ldf_tick = self.getListTickerDF(dframe) self.trades = ldf_tick overnightTrade = list() i = 0 for ticker in ldf_tick: if ticker[rc.shares].sum() != 0: overnightTrade.append(dict()) overnightTrade[i]['ticker'] = ticker[rc.ticker].unique()[0] overnightTrade[i]['shares'] = ticker[rc.shares].sum() overnightTrade[i]['before'] = 0 overnightTrade[i]['after'] = 0 overnightTrade[i]['acct'] = ticker[rc.acct].unique()[0] i = i + 1 return overnightTrade
def mkShortsNegative(self, dframe): ''' Fix the shares sold to be negative values. @testpu''' rc = ReqCol() for i, row in dframe.iterrows(): if row[rc.side] != 'B' and row[rc.shares] > 0: dframe.at[i, rc.shares] = ((dframe.at[i, rc.shares]) * -1) return dframe
def __init__(self, jf, df=None): self.jf = jf if not isinstance(df, pd.DataFrame): self.df = pd.read_csv(self.jf.inpathfile) else: self.df = df if 'Date' not in self.df.columns: self.df['Date'] = jf.theDate rc = ReqCol() DataFrameUtil.checkRequiredInputFields(self.df, rc.columns)
def test_NewSingleTxPerTicket(self): ''' Test the method Statement_DAS.newSingleTxPerTicket. That method creates a new csv file reducing multi row transactions to a single row, averaging the prices, totaling the amounts. Explicitly tests: A newFile has been created and made the infile of JournalFiles. The PL summary is the same between the two files . The shares total for each symbol/account/buy/sell is the same ''' rc = ReqCol() for infile in self.infiles: outdir = 'out/' indir = 'data/' indir = os.path.realpath(indir) jf = JournalFiles(indir=indir, infile=infile, outdir=outdir) origdframe = pd.read_csv(jf.inpathfile) originfile = jf.infile tkt = Statement_DAS(jf) newDF, jf = tkt.getTrades() self.assertNotEqual(originfile, jf.infile) newdframe = pd.read_csv(jf.inpathfile) self.assertAlmostEqual(origdframe[rc.PL].sum(), newdframe[rc.PL].sum(), places=10) self.assertAlmostEqual(newDF[rc.PL].sum(), newdframe[rc.PL].sum(), places=10) for symbol in origdframe[rc.ticker].unique(): for accnt in origdframe[rc.acct].unique(): d = origdframe n = newDF d = d[d[rc.ticker] == symbol] d = d[d[rc.acct] == accnt] dbuy = d[d[rc.side].str.startswith('B')] dsell = d[d[rc.side].str.startswith('S')] n = n[n[rc.ticker] == symbol] n = n[n[rc.acct] == accnt] nbuy = n[n[rc.side].str.startswith('B')] nsell = n[n[rc.side].str.startswith('S')] self.assertEqual(dbuy[rc.shares].sum(), nbuy[rc.shares].sum()) self.assertEqual(dsell[rc.shares].sum(), nsell[rc.shares].sum())
def test_CreateSingleTicket(self): ''' Test the method Statement_DAS.createSingleTicket. Requires the list of dfs created by getListOfTicketDF. Explicitly test that each element is a 1 row DataFrame. That the new price, (the average price of its transactions) is always greater than the min and less than the max. And finally check that the total number of shares (total) is the same as the sum of shares in constituent transactions. ''' rc = ReqCol() indir = 'data/' outdir = 'out/' infiles = self.infiles for infile in infiles: jf = JournalFiles(indir=indir, infile=infile, outdir=outdir) tkt = Statement_DAS(jf) listTick = tkt.getListOfTicketDF() totalSharesForDay = 0 for tick in listTick: singleTicket = tkt.createSingleTicket(tick) self.assertIsInstance(singleTicket, type(pd.DataFrame()), "Failed to create a DataFrame") self.assertEqual(len(singleTicket), 1, "Failed to create a single item ticket") # print(tick[rc.price].min()) # print(singleTicket[rc.price].unique()[0]) # print(tick[rc.price].max()) # print() try: isclose(singleTicket[rc.price].unique()[0], tick[rc.price].max(), abs_tol=1e-8) except AssertionError: self.assertLessEqual(singleTicket[rc.price].unique()[0], tick[rc.price].max()) try: isclose(singleTicket[rc.price].unique()[0], tick[rc.price].min(), abs_tol=1e-8) except AssertionError: self.assertGreaterEqual(singleTicket[rc.price].unique()[0], tick[rc.price].min()) totalSharesForDay = totalSharesForDay + tick[rc.shares].sum() dframe = pd.read_csv(jf.inpathfile) self.assertEqual( dframe[rc.shares].sum(), totalSharesForDay, "Failed to acount for all the shares transacted.")
def zeroPadTimeStr(self, dframe): ''' Guarantee that the time format xx:xx:xx ''' rc = ReqCol() # time = rc.time for i, row in dframe.iterrows(): tm = row[rc.time] tms = tm.split(":") if int(len(tms[0]) < 2): if not tms[0].startswith("0"): tm = "0" + tm dframe.at[i, rc.time] = tm return dframe
def test_addFinReqCol(self): ''' Test the method journal.definetrades.TestDefineTrades.addFinReqCol ''' rc = ReqCol() frc = FinReqCol() df = pd.DataFrame(np.random.randint(0, 1000, size=(10, len(rc.columns))), columns=rc.columns) dtrades = DefineTrades() df = dtrades.addFinReqCol(df) for x in frc.columns: self.assertIn(x, df.columns) self.assertGreaterEqual(len(df.columns), len(frc.columns))
def testCheckReqColumnsWithReqColSuccess(self): '''Test return values of DataFrameUtil.checkRequiredInputFields''' reqCol = ReqCol() finReqCol = FinReqCol() frc = pd.DataFrame(columns=finReqCol.columns) t1 = False t2 = False try: t1 = DataFrameUtil.checkRequiredInputFields(frc, finReqCol.columns) t2 = DataFrameUtil.checkRequiredInputFields(frc, reqCol.columns) except ValueError as ex: print(ex) self.assertTrue(t1) self.assertTrue(t2)
def processInputFile(self, trades, theDate=None, jf=None): ''' Run the methods for this object ''' reqCol = ReqCol() DataFrameUtil.checkRequiredInputFields(trades, reqCol.columns) trades = self.zeroPadTimeStr(trades) trades = trades.sort_values( [reqCol.acct, reqCol.ticker, reqCol.date, reqCol.time]) trades = self.mkShortsNegative(trades) swingTrade = self.getOvernightTrades(trades) swingTrade, success = self.figureOvernightTransactions(trades, jf) if not success: return None, success trades = self.insertOvernightRow(trades, swingTrade) trades = self.addDateField(trades, theDate) return trades, True
def normColumns_IBActivity(self, df): ''' The so called norm will have to become the new norm. This particular method is similar enough between CSV, Daily, and Activity to combine the 3 into one ''' rc = ReqCol() if 'PL' not in df.columns: df['PL'] = 0 df = df[['Date/Time', 'Symbol', 'T. Price', 'Quantity', 'Account', 'Proceeds', 'PL', 'Code']].copy() df['Date'] = df['Date/Time'] df[rc.side] = '' df.Quantity = df.Quantity.astype(int) df['T. Price'] = df['T. Price'].astype(float) df['Proceeds'] = df['Proceeds'].astype(float) df['Code'] = df['Code'].astype(str) for i, row in df.iterrows(): # df.at[i, 'Date'] = df.at[i, 'Date'][:10] cleandate = pd.Timestamp(df.at[i, 'Date']) df.at[i, 'Date'] = cleandate.strftime('%Y-%m-%d %H:%M:%S') df.at[i, 'Date/Time'] = df.at[i, 'Date/Time'][12:] code = df.at[i, 'Code'].split(';') if df.at[i, 'Quantity'] < 0: df.at[i, rc.side] = 'S' else: df.at[i, rc.side] = 'B' for c in code: if c in ['O', 'C']: df.at[i, 'Code'] = c continue df = df.rename(columns={'Date/Time': rc.time, 'Symbol': rc.ticker, 'T. Price': rc.price, 'Quantity': rc.shares, 'Account': rc.acct, 'Code': 'O/C', 'PL': rc.PL}) return df
def test_MkShortNegative(self): ''' Test the method Statement_DAS.mkShortsNegative ''' rc = ReqCol() for dummy in range(random.randint(2, 10)): side, mult, shares = getTestSet(random.randint(4, 20)) testSet = list(zip(side, shares)) apd = pd.DataFrame(testSet, columns=[rc.side, rc.shares]) for i in range(len(side)): # self.assertEqual(apd[rc.shares][i], shares[i]) self.assertEqual(apd[rc.shares][i], shares[i]) idf = InputDataFrame() apd = idf.mkShortsNegative(apd) for i in range(len(side)): self.assertEqual(apd[rc.shares][i], shares[i] * mult[i])
def test_addDateFieldx(self): ''' Test the method writeShareBalance. Send some randomly generated trades side and qty and test the share balance that returns. Sort both and compare the results using the place index iloc ''' NUMTRADES = 4 start = pd.Timestamp('2018-06-06 09:30:00') df = pd.DataFrame() exclude = [] for i in range(NUMTRADES): tdf, start = randomTradeGenerator2(i + 1, earliest=start, pdbool=True, exclude=exclude) df = df.append(tdf) exclude.append(tdf.Symb.unique()[0]) df.reset_index(drop=True, inplace=True) rc = ReqCol() df2 = df[['Time', 'Symb', 'Side', 'Qty', 'Account', 'P / L']].copy() idf = InputDataFrame() df2 = idf.addDateField(df2, start) for i in range(len(df2)): rprev = rnext = '' row = df2.iloc[i] rprev = df2.iloc[i - 1] if i != 0 else '' rnext = df2.iloc[i + 1] if i < (len(df2) - 1) else '' daydelt = pd.Timedelta(days=1) # print(row.Side, type(rprev), type(rnext)) rt = pd.Timestamp(row.Time) rd = pd.Timestamp(row.Date) assert rt.time() == rd.time() if row.Side == 'HOLD-B' or row.Side == 'HOLD+B': assert row.Date.date() == rnext.Date.date() - daydelt if row.Side == 'HOLD-' or row.Side == 'HOLD+': assert row.Date.date() == rprev.Date.date() + daydelt return df2
def test_GetListOfTicketDF(self): ''' Test the method Statement_DAS.getListOfTicketDF. Explicitly tests: Each ticket has only long or short only Each ticket has a single ticker symbol, cloid, and account ''' rc = ReqCol() outdir = 'data/' # A list of files that were problematic infiles = self.infiles # otherinfiles = ['trades.911.noPL.csv', 'trades.csv'] for f in infiles: # trade = os.path.join(outdir, f) jf = JournalFiles(indir=outdir, infile=f, outdir='out/', mydevel=True) tkt = Statement_DAS(jf) tktList = tkt.getListOfTicketDF() totalTX = 0 for ticket in tktList: self.assertEqual( len(ticket[rc.side].unique()), 1, "There can only be one side, long or short, in a ticket") self.assertEqual(len(ticket[rc.ticker].unique()), 1, "There can only be one ticker in a ticket") self.assertEqual(len(ticket['Cloid'].unique()), 1, "There can be only one Cloid in a ticket") self.assertEqual(len(ticket[rc.acct].unique()), 1, "There can be only one account in a ticket") totalTX = totalTX + len(ticket) trades = pd.read_csv(jf.inpathfile) msg = "There is a discrepancy in number of transactions in the tickets" self.assertEqual(len(trades), totalTX, msg)
def getListTickerDF(self, dframe): ''' Returns a python list of all tickers/account traded in todays input file. :params dframe: The DataFrame with the days trades that includes the column tickCol (Symb by default and in DAS). :return: The list of tickers in the days trades represented by the DataFrame ''' rc = ReqCol() listOfTickers = list() for symb in dframe[rc.ticker].unique(): for acct in dframe[rc.acct][dframe[rc.ticker] == symb].unique(): # ldf = dframe[dframe[rc.ticker]==symb][dframe[rc.acct]==acct] ldf = dframe[dframe[rc.ticker] == symb] ldf = ldf[ldf[rc.acct] == acct] listOfTickers.append(ldf) # This code is too interdependent. gtoOvernightTrade, figureOvernightTrades, askUser # and insertOvernightRow combined with the data return listOfTickers
def _checkUniqueSIMTX(self): ''' Check the SIM transactions for uniqueness within (ticker, time, account). I believe these are always necessarily unique. I need to know if they are not. If found,the program should fail or alert the user and work around ''' rc = ReqCol() dframe = self.df #HACK ALERT #This is guaranteed to cause some future problem # If Cloid has some Sim ids ('AUTO') the column must have some str elements. Without this # it throws a TypeError and a Future Warning about changing code. For DataFrame columns # without any sim trades there are only floats. This is not guaranteed behavior, just # observed from my runs. And there there is some weirdness between numpy types and python # regarding what type to return for this comparison--and it may change in the future. # if len(dframe.Cloid.apply(lambda x: isinstance(x, str))) < 1 : doSomething = False for t in dframe['Cloid']: if isinstance(t, str): doSomething = True break if not doSomething: return df = dframe[dframe['Cloid'] == "AUTO"] tickerlist = list() for dummy, row in df.iterrows(): tickerlist.append((row[rc.time], row[rc.ticker], row[rc.acct])) tickerset = set(tickerlist) if len(tickerset) != len(tickerlist): # print("\nFound a Sim ticket that is not unique. # This should not be possible (but it happens).{}".format(tickerlist[-1])) return
def addDateField(self, trades, theDate): ''' Add the date column if it does not already exist and fill it with the date/time from the given date or today and the time column :params trades: ''' c = ReqCol() if not 'Date' in trades.columns: if theDate: theDate = pd.Timestamp(theDate) else: theDate = pd.Timestamp.today() trades['Date'] = theDate for i, row in trades.iterrows(): dd = row.Date tt = row.Time dadate = pd.Timestamp(dd.year, dd.month, dd.day, tt.hour, tt.minute, tt.second) trades.at[i, 'Date'] = dadate # We need to make up a date for Hold rows. Before holds were assigned an early AM time # and after holds a late PM time. The times were assigned for sorting. Before holds # will be given a date before a second trade date identified because they have been # sorted by [account, ticker, time]. Likewise an an after hold will be given the next # day after the previous trade. There should not be any single hold entries without an # actual trade from this input file but we will assert that fact in order to find # unaccountable weirdnesses. for i, row in trades.iterrows(): if row[c.side].lower().startswith('hold'): # Currently c.time a time string with no date. Compare early and late times datime = row[c.time] d = pd.Timestamp(datime) early = pd.Timestamp(d.year, d.month, d.day, 3, 0, 0) late = pd.Timestamp(d.year, d.month, d.day, 10, 59, 0) delt = pd.Timedelta(days=1) if d < early: assert row[c.side] in ['HOLD+B', 'HOLD-B'] assert len(trades) > i + 1 assert trades.at[i, c.ticker] == trades.at[i + 1, c.ticker] # Create the made up date- the day before the first tx from this input for # this trade. tradeday = trades.at[i + 1, c.date] tradeday = pd.Timestamp(tradeday) holdday = tradeday - delt holdtime = pd.Timestamp(holdday.year, holdday.month, holdday.day, d.hour, d.minute, d.second) trades.at[i, 'Date'] = holdtime elif d > late: assert row[c.side] in ['HOLD+', 'HOLD-'] assert i > 0 assert trades.at[i, c.ticker] == trades.at[i - 1, c.ticker] tradeday = trades.at[i - 1, c.date] tradeday = pd.Timestamp(tradeday) holdtime = tradeday + delt holdtime = pd.Timestamp(holdtime.year, holdtime.month, holdtime.day, d.hour, d.minute, d.second) # holdtime = holdtime.strftime('%Y-%m-%d %H:%M:%S') trades.at[i, c.date] = holdtime return trades
def insertOvernightRow(self, dframe, swTrade): ''' Insert non-transaction rows that show overnight transactions. Set Side to one of: HOLD+, HOLD-, HOLD+B, HOLD_B :params dframe: The trades dataframe. :params swTrade: A data structure holding information about tickers with unbalanced shares. ''' rc = ReqCol() newdf = DataFrameUtil.createDf(dframe, 0) for ldf in self.getListTickerDF(dframe): # print(ldf[rc.ticker].unique()[0], ldf[rc.acct].unique()[0]) for trade in swTrade: if (trade['ticker'] == ldf[rc.ticker].unique()[0] and (trade['acct'] == ldf[rc.acct].unique()[0])): # msg = "Got {0} with the balance {1}, before {2} and after {3} in {4}" # print(msg.format(trade['ticker'], trade['shares'], trade['before'], # trade['after'], trade['acct'])) # insert a non transaction HOLD row before transactions of the same ticker if trade['before'] != 0: newldf = DataFrameUtil.createDf(dframe, 1) for j, dummy in newldf.iterrows(): if j == len(newldf) - 1: newldf.at[j, rc.time] = '00:00:01' newldf.at[j, rc.ticker] = trade['ticker'] if trade['before'] > 0: newldf.at[j, rc.side] = "HOLD-B" else: newldf.at[j, rc.side] = "HOLD+B" newldf.at[j, rc.price] = float(0.0) newldf.at[j, rc.shares] = -trade['before'] # ZeroSubstance' newldf.at[j, rc.acct] = trade['acct'] newldf.at[j, rc.PL] = 0 ldf = newldf.append(ldf, ignore_index=True) break # Insert a non-transaction HOLD row after transactions from the same ticker # Reusing ldf for something different here...bad form ... maybe ... # adding columns then appending and starting over if trade['after'] != 0: # print("Are we good?") ldf = DataFrameUtil.addRows(ldf, 1) for j, dummy in ldf.iterrows(): if j == len(ldf) - 1: ldf.at[j, rc.time] = '23:59:59' ldf.at[j, rc.ticker] = trade['ticker'] if trade['after'] > 0: ldf.at[j, rc.side] = "HOLD+" else: ldf.at[j, rc.side] = "HOLD-" ldf.at[j, rc.price] = float(0.0) # -trade makes the share balance work in excel # for shares held after close ldf.at[j, rc.shares] = 0 # -trade['after'] # 'ZeroSubstance' ldf.at[j, rc.acct] = trade['acct'] ldf.at[j, rc.PL] = 0 newdf = newdf.append(ldf, ignore_index=True, sort=False) return newdf