def account(lines): """ [List] lines => [Tuple] ([String] account code , [Iterable] holdings , [Iterable] cashEntries) There are two cases: (1) Normal case: Account line (account code, name) holding section (0 or 1 holding section): ... cash section (0 or 1 cash section): ... (2) Special case: Account line (account code, name) No data for this account (second line) """ emptyAccount = lambda L: True if len(L) > 0 and L[0] == 'No Data for this Account' \ else False if emptyAccount(lines[1]): return ('', []) cashSection = lambda L: True if len(L) > 0 and L[0] == 'Branch Code' \ else False sections = itemGroup(cashSection, lines[1:]) return (readAccountCode(lines[0][0]) , chain(readPosition(pop(sections)), readPosition(pop(sections))))
def getDateFromLines(lines): """ [Iterable] lines => [String] date (yyyy-mm-dd) Read date from the second line. """ pop(lines) # skip the first line return compose( lambda s: datetime.strftime(datetime.strptime( s, '%d/%m/%y'), '%Y-%m-%d'), lambda line: line[0].split()[-1], pop)(lines)
def readSfcTemplate(file): """ [String] file => [List] filter string lines Each filter string line is a List, and each element of that line looks like: ('China', 'Equity', 'Listed Equities') ... """ notHeaderLine = lambda line: \ len(line) == 0 or not line[0].startswith('By asset class / by region') takeBetweenLines = compose( partial(takewhile, lambda line: len(line) > 0 and line[0] != 'Total'), partial(dropwhile, notHeaderLine)) getHeaders = compose(list, partial(map, lambda s: s.strip()), partial(takewhile, lambda el: el != 'Total'), partial(dropwhile, lambda el: el == ''), lambda line: line[1:]) removeTrailingSpaces = compose(tuple, reversed, list, partial(dropwhile, lambda el: el == ''), reversed, lambda line: line[:5]) lineToTuple = compose( lambda t: t if len(t) < 2 or t[-1] != t[-2] else t[:-1] # in case the last 2 # criteria are the same , removeTrailingSpaces) def fillupLeadingSpaces(lines): fillSpace = compose( tuple, partial(map, lambda t: t[0] if t[1] == '' else t[1])) combine = lambda previous, line: \ zip(previous, line) if len(previous) > len(line) else \ zip_longest(previous, line) previous = tuple(repeat('', 5)) for line in lines: if line[0] == '': previous = fillSpace(combine(previous, line)) else: previous = line yield previous # End of fillupLeadingSpaces() getAssetTypes = compose(fillupLeadingSpaces, partial(map, lineToTuple)) return \ compose( lambda t: (getHeaders(t[0]), getAssetTypes(t[1])) , lambda lines: (pop(lines), lines) , takeBetweenLines , fileToLines )(file)
def getRawPositions(lines): nonEmptyLine = lambda line: len(line) > 0 and line[0] != '' headerMap = { 'Account Name': 'portfolio', 'Currency': 'currency', 'Currency(or Equiv.)': 'currency', 'Ledger Balance': 'balance', 'Ledger Balance(Total Equiv.)': 'balance' } """ [List] line => [List] Headers Only a few fields (headers) will be useful in the output csv, therefore we map those headers to field names in the output csv. """ getHeadersFromLine = compose( list, partial(map, lambda s: headerMap[s] if s in headerMap else s), partial(map, lambda s: s.split('\n')[-1]), partial(takewhile, lambda s: s != '')) return \ compose( partial(map, dict) , lambda t: map(partial(zip, getHeadersFromLine(t[0])), t[1]) , lambda lines: (pop(lines), lines) , partial(takewhile, nonEmptyLine) )(lines)
def readJPM(lines): """ [Iterable] lines => [Tuple] ([String] date , [List] holding positions , [List] cash positions) From the lines of the JPM statement file, read out its date and a list of accounts. The worksheet consists of multiple accounts. The structure of data is like: Header lines (consist of date) Account 1 lines (consist of holding and cash) Account 2 lines ... """ accountLine = lambda L: True if len(L) > 0 and str(L[0]).startswith('Account:') \ else False sections = itemGroup(accountLine, lines) dateString = dateFromHeader(pop(sections)) # consume the first section isGenevaHolding = lambda x: 'name' in x (holdings, cashEntries) = \ divide(isGenevaHolding , reduce(chain # concatenate all positions (holding or cash) , map(partial(genevaPosition, dateString) , map(account, sections)) , [])) return (dateString, holdings, cashEntries)
def convertAccumulateExcelToCSV(file): """ [String] file => [String] file Read an accmulative trade excel file, write it as csv. We need to make sure: make sure dates as yyyy-mm-dd, so that it's consistent with a daily addon from the bloomberg aim trade file. The csv file name is the same as the excel file, except that its file extension is '.csv' instead of '.xlsx' This is an utility function that needs to run only once, to convert the excel version accmulate trade file into csv format. After that, we just need to add daily trades to that csv file. """ getOutputFileName = lambda fn: \ fn[0:-4] + 'csv' if fn.endswith('.xlsx') else \ fn[0:-3] + 'csv' if fn.endswith('.xls') else \ lognRaise('convertAccumulateExcelToCSV(): invalid input file {0}'.format(fn)) """ [List] line => [List] headers Note the second header is an empty string, but we need to keep it. All other empty strings in the list are ignored """ getHeaders = compose(list, partial(map, lambda t: t[1]), partial(takewhile, lambda t: t[0] < 2 or t[1] != ''), lambda line: zip(count(), line)) def toDatetimeString(value): if isinstance(value, float): return datetime.strftime(fromExcelOrdinal(value), '%Y-%m-%d') else: try: return datetime.strftime(datetime.strptime(value, '%m/%d/%Y'), '%Y-%m-%d') except ValueError: return datetime.strftime(datetime.strptime(value, '%d/%m/%Y'), '%Y-%m-%d') getLineItems = lambda headers, line: compose( partial( map , lambda t: toDatetimeString(t[1]) \ if t[0] in ['Trade Date', 'Settlement Date'] else t[1] ) , lambda headers, line: zip(headers, line) )(headers, line) return compose( lambda rows: writeCsv(getOutputFileName(file), rows, delimiter=','), lambda t: chain([t[0]], map(partial(getLineItems, t[0]), t[1])), lambda lines: (getHeaders(pop(lines)), lines), fileToLines)(file)
def loadCountryGroupMappingFromFile(file): """ [String] file => [Dictionary] country code -> country group """ return \ compose( dict , partial(map, lambda line: (line[0], line[2].strip())) , partial(takewhile, lambda line: len(line) > 2 and line[0] != '') , lambda t: t[1] , lambda lines: (pop(lines), lines) , fileToLines , partial(join, getDataDirectory()) )(file)
def loadRatingScoreMappingFromFile(file): """ [String] rating score mapping file => [Dictionary] (agency, rating string) -> rating score """ return \ compose( dict , partial(map, lambda line: ((line[0], line[1]), line[2])) , partial(takewhile, lambda line: len(line) > 2 and line[0] != '') , lambda t: t[1] , lambda lines: (pop(lines), lines) , fileToLines , partial(join, getDataDirectory()) )(file)
def getRawPositions(lines): """ [Iterable] lines => [Iterable] Positions lines: rows in a file, where each row is a list of columns """ nonEmpty = lambda s: s.strip() != '' toLower = lambda s: s.lower() headers = list(takewhile(nonEmpty, map(toLower, map(str, pop(lines))))) position = lambda values: dict(zip(headers, values)) nonEmptyLine = lambda line: True if len(line) > 0 and nonEmpty(line[0] ) else False return map(position, takewhile(nonEmptyLine, lines))
def dateFromHeader(lines): """ [Iterable] lines => [String] date From the header section of the JPM statement file, represented by a list of lines, extract the date of the statement. """ dateLine = lambda L: True if len(L) > 0 and str(L[0]).startswith('As Of:') \ else False def extractDateString(s): temp_list = s.split(':')[1].strip().split('-') month = {'jan':'01', 'feb':'02', 'mar':'03', 'apr':'04', 'may':'05', 'jun':'06', 'jul':'07', 'aug':'08', 'sep':'09', 'oct':'10', 'nov':'11', 'dec':'12'}\ [temp_list[1].lower()] return temp_list[2] + '-' + month + '-' + temp_list[0] return extractDateString(pop(filter(dateLine, lines))[0])
def readPosition(lines): """ [List] lines => [Iterable] holdings Where a holding is a dictionary object representing a position. The lines could be a holding section or a cash section. header line(s) <empty line> lines for position 1 <empty line> lines for position 2 """ if lines == None: return [] sections = itemGroup(emptyLine, lines) headers = readHeaders(pop(sections)) # first section is the header return map(partial(position, headers), sections)
""" cashPosition = lambda date, p: \ { 'portfolio': p['Account ID']\ , 'custodian': ''\ , 'date': date\ , 'currency': p['Currency']\ , 'balance': p['SD Balance Local']\ } """ [Iterable] lines => [String] date, [Iterable] Positions Read the lines from a Nomura position or cash report, return date and positions from that report. """ getPositions = lambda file: \ (lambda lines: (dateFromLine(pop(lines)), getRawPositions(lines)))\ (fileToLines(file)) """ [Iterable] lines => ([List] headers, [Iterable] lines) Take the first line and convert it to headers, then return the headers and the remaining lines. This is NOT a pure function. The first line of lines is consumed. """ getHeadersnLines = lambda lines: \ ( list(takewhile(lambda x: x != '', map(lambda x: x.strip(), pop(lines))))\ , lines\ ) """ [Iterable] lines => [Iterable] Positions
, partial(takewhile, lambda s: s.strip() != '') , partial(map, str) ) """ [Iterable] lines => [List] Raw Positions Assume: each line is an iterator over its columns. """ getRawPositions = compose( list , partial(map, dict) , lambda t: map(partial(zip, t[0]), t[1]) , lambda lines: (getHeadersFromLine(pop(lines)), lines) ) """ [Iterator] lines => [Iterator] positions, [Dictionary] metaData Lines from a Geneva report are divided into sections by blank lines. If the report is about a single portfolio or consolidate = group for a group of portfolios, then the first section is its positions and the second section is its meta data. The function groupby() returns an iterator, according to our observation if we consume the 3rd section (the meta data) first, then first section (positions) we will get nothing. So we must use 'list' to get the positions
def getLqaData(date, mode='production', separator='|'): """ [String] date (yyyymmdd), [String] mode => [Dictionary] id -> lqa data (dictionary) """ def fileToLines(file): with open(file, 'r') as lqaFile: for line in lqaFile: yield line.strip() getLqaDataFile = lambda date, mode: \ join(getInputDirectory(mode), 'LqaData_' + date + '.bbg') toPosition = lambda headers, line: dict(zip(headers, line)) # Take the lines between 'START-OF-DATA' and 'END-OF-DATA' takeInBetween = compose( lambda t: takewhile(lambda L: L[0] != 'END-OF-DATA', t[1]), lambda lines: (pop(lines), lines), partial(dropwhile, lambda L: len(L) == 0 or L[0] != 'START-OF-DATA')) """ The below functions: 1. stipDoubleQuote() 2. toNumberOrStripQuote() 3. updateSecurityId() They are used to clean the data. Because sometimes the string data are enclosed by a pair of double quotes, we need to removed. The float numbers may be read as string and sometimes we have security ids like "XS1234567890 PerfShs" and we need to take only the first part. """ stipDoubleQuote = lambda s: s.replace('"', '') def toNumberOrStripQuote(x): try: return float(x) except: return stipDoubleQuote(x) def updateSecurityId(p): if len(p['SECURITIES'].split()[0]) == 12: # it's ISIN return mergeDict(p, {'SECURITIES': p['SECURITIES'].split()[0]}) else: return p return \ compose( dict , partial(map, lambda p: (p['SECURITIES'], p)) , partial(map, updateSecurityId) , partial(map, partial(valmap, toNumberOrStripQuote)) , lambda t: map(partial(toPosition, t[0]), t[1]) , lambda lines: (list(map(stipDoubleQuote, pop(lines))), lines) , takeInBetween , partial(map, lambda line: line.split(separator)) , fileToLines , lambda file: lognContinue('getLqaData(): from file: {0}'.format(file), file) , getLqaDataFile )(date, mode)