def openIBStatementHtml(self, infile): ''' Open an IB Statement in html form ''' if not os.path.exists(infile): return soup = BeautifulSoup(readit(infile), 'html.parser') tbldivs = soup.find_all("div", id=lambda x: x and x.startswith('tbl')) title = soup.find('title').text self.parseTitle(title) tables = dict() tablenames = dict() for tableTag in tbldivs: continueit = True tabKey = '' for key in [ 'tblAccountInformation', 'tblOpenPositions', 'tblLongOpenPositions', 'tblShortOpenPositions', 'tblTransactions', 'tblTrades' ]: if tableTag['id'].startswith(key): continueit = False tabKey = key[3:] break if continueit: continue tab = tableTag.find("table") if not tab: continue df = pd.read_html(str(tab)) assert len(df) == 1 df = df[0] # .replace(np.nan, '') tables[tabKey] = df if 'Transactions' not in tables.keys() and 'Trades' not in tables.keys( ): msg = 'The statment lacks a trades table.' return dict(), msg self.doctorHtmlTables(tables) posTab = None if 'OpenPositions' in tables.keys(): posTab = tables['OpenPositions'] tables['Trades'] = self.figureBAPL(tables['Trades'], posTab) ibdb = StatementDB(source='IB', db=self.db) ibdb.processStatement(tables['Trades'], self.account, self.beginDate, self.endDate, openPos=posTab) for key in tables: tablenames[key] = key tablenames[tabKey] = tabKey return tables, tablenames return dict(), 'This statement lacks any overnight information.'
def openTradeFlexCSV(self, infile): ''' Open a Trade flex statement csv file. This is a single table file. The headers are in the top row so just reading it with read_csv will collect them. This table is missing the Open/Close data. ''' df = pd.read_csv(infile) self.inputType = 'T_FLEX' rc = self.rc # This one table file has no tableid currentcols = list(df.columns) ourcols = self.getColsByTabid('FlexTrades') ourcols, missingcols = self.verifyAvailableCols( currentcols, ourcols, 'DailyTrades') df = df[ourcols].copy() df = df.rename(columns={ 'Date/Time': 'DateTime', 'Code': 'Codes', 'ClientAccountID': 'Account' }) lod = df['LevelOfDetail'].str.lower().unique() if 'order' in lod: pass elif 'execution' in lod: if 'OrderID' in missingcols: msg = 'This table contains transaction level data but lacks OrderID.' return dict(), msg else: # df = df.rename(columns={'OrderID': 'IBOrderID'}) df = self.combinePartialsFlexTrade(df) else: # TODO 2019-07-03 if this never trips, blitz the statmement for just in case raise ValueError( "If this trips, detemine if the data is savlagable") # if len(df) < 1: if df.empty: msg = 'This statement has no trades.' return dict(), msg # The Codes col acks the OpenClose codes so were done with it. df = df.drop(['LevelOfDetail', 'Codes'], axis=1) df = self.unifyDateFormat(df) self.account = df['Account'].unique()[0] beg = df['DateTime'].min() end = df['DateTime'].max() assert beg assert end try: self.beginDate = pd.Timestamp(beg).date() self.endDate = pd.Timestamp(end).date() except ValueError: msg = f'Unknown date format error: {beg}, {end}' return dict(), dict() df = df.rename(columns={'Symbol': rc.ticker, 'Quantity': rc.shares}) x = self.cheatForBAPL(df) if not x.empty: ibdb = StatementDB(db=self.db, source='IB') ibdb.processStatement(x, self.account, self.beginDate, self.endDate) df = x.copy() return {'Trades': df}, {'Trades': 'Trades'} return {'Trades': df}, {'Trades': 'Trades'}
def openActivityFlexCSV(self, df): ''' This will process a flex activity statement with headers and with or without metadata. The metadata rows are itendified with BOF BOA BOS columns. Setting up to process multiple accounts but the table names are still messed up Raise error if multiple accounts are sent in for now ''' tables = dict() tablenames = dict() mcd = dict() accounts = [] ldf, filemetadata = self.getFrame(('BOF', 'EOF'), df) accountsmetadata = [] if ldf and isinstance(ldf[0], pd.DataFrame): accounts, accountsmetadata = self.getFrame(('BOA', 'EOA'), ldf[0]) if len(accounts) > 1: raise ValueError( 'Multiple accounts is not enabled for Ib Statement parsing' ) filemetadata = filemetadata[0] accountsmetadata = accountsmetadata[0] else: accounts.append(df) for dfa in accounts: if filemetadata: # self.account = filemetadata[1] self.statementname = filemetadata[2] beginDate = filemetadata[4] self.beginDate = pd.Timestamp(beginDate).date() endDate = filemetadata[5] self.endDate = pd.Timestamp(endDate).date() if accountsmetadata: self.account = accountsmetadata[1] tabids = dfa[1].unique() for tabid in tabids: t = dfa[dfa[1] == tabid] if 'BOS' in t[0].unique(): tab, tabmetadata = self.getFrame(('BOS', 'EOS'), t) assert len(tab) == 1 assert len(tabmetadata) == 1 t = tab[0] tabmetadata = tabmetadata[0] currentcols = list(t.columns) headers = list(t[t[0] == 'HEADER'].iloc[0]) t = t[t[0] == 'DATA'] assert len(currentcols) == len(headers) t.columns = headers ourcols = self.getColsByTabid(tabid) ourcols, missingcols = self.verifyAvailableCols( headers, ourcols, tabid) if not ourcols: continue t = t[ourcols] mcd[tabid] = missingcols # Assign to dict and return tables[tabid] = t.copy() tablenames[tabid] = tabid tables, msg = self.doctorFlexTables(tables, mcd) if not len(tables.keys()): # TODO When enabling multi accounts-- fix this to not return return tables, msg ibdb = StatementDB(db=self.db, source='IB') positions = None if 'POST' in tables.keys(): positions = tables['POST'] tables['TRNT'] = self.figureBAPL(tables['TRNT'], positions) ibdb.processStatement(tables['TRNT'], self.account, self.beginDate, self.endDate, openPos=positions) return tables, tablenames
def getTablesFromDefaultStatement(self, df): ''' From a default Activity statement csv, retrieve AccountInformation, OpenPositions, and Trades ''' # df = pd.read_csv(infile, header=range(0,15)) keys = df[0].unique() tables = dict() tablenames = dict() mcd = dict() for key in keys: if key not in [ 'Statement', 'Account Information', 'Open Positions', 'Short Open Positions', 'Long Open Positions', 'Trades' ]: continue t = df[df[0] == key] headers = t[t[1].str.lower() == 'header'] if len(headers) > 1: msg = f'This statement has {len(headers)} {key} tables.' msg = msg + '\nMulti account statment not supported.' return dict(), msg assert t.iloc[0][1].lower() == 'header' currentcols = list(t.columns) headers = headers.iloc[0] t = t[t[1].str.lower() == 'data'] assert len(currentcols) == len(headers) t.columns = headers ourcols = self.getColsByTabid(key) ourcols, missingcols = self.verifyAvailableCols( headers, ourcols, key) if not ourcols: continue t = t[ourcols] if key in ['Long Open Positions', 'Short Open Positions']: t = t[t['DataDiscriminator'].str.lower() == 'summary'] key = 'OpenPositions' ourcols = self.getColsByTabid(key) if ourcols: ourcols, missingcols = self.verifyAvailableCols( list(t.columns), ourcols, key) t = t[ourcols].copy() if 'OpenPositions' in tables.keys(): if not set(tables['OpenPositions'].columns) == set( t.columns): msg = 'A Programmer thing-- see it occur before I write code' raise ValueError(msg) tables['OpenPositions'] = tables['OpenPositions'].append(t) tablenames['OpenPositions'] = 'OpenPositions' continue else: key = 'OpenPositions' key = key.replace(' ', '') mcd[key] = missingcols tables[key] = t tablenames[key] = key tables = self.doctorDefaultCSVStatement(tables, mcd) if 'Trades' not in tables.keys(): # This should maybe be a dialog msg = 'The statment lacks a trades table' return dict(), msg if self.account is None: msg = '''This statement lacks an account number. Can't add it to the database''' return dict(), msg ibdb = StatementDB(self.db, source='IB') openpos = None if 'OpenPositions' in tables.keys(): openpos = tables['OpenPositions'] tables['Trades'] = self.figureBAPL(tables['Trades'], tables['OpenPositions']) # Here we need to combine with cheatForBAPL to accomodate statements with no # OpenPositions ibdb.processStatement(tables['Trades'], self.account, self.beginDate, self.endDate, openPos=openpos) return tables, tablenames