def getDailyFList(d, listIndexCache=None): """ Returns a list of SEC filed forms: [(cik, cikName, formType, fileDate, accNo), ... ] for the given date or ISO date string, retrieved from the SEC daily index. """ dateStr = utils.toDateStr(d) listUrl, listIndexUrl = getSecDailyIndexUrls(dateStr) if listIndexCache is None or listIndexUrl not in listIndexCache: listIndexJson = utils.downloadSecUrl(listIndexUrl, toFormat='json') listIndex = set(item['name'] for item in listIndexJson['directory']['item'] if item['name'].startswith('master')) print(f'### list index {len(listIndex)}', end=' ') if listIndexCache is not None: listIndexCache[listIndexUrl] = listIndex else: listIndex = listIndexCache[listIndexUrl] if 'master.' + dateStr + '.idx' not in listIndex: print('HOLIDAY', end=' ') return [] res = downloadSecFormList(listUrl) print('count for', dateStr + ':', len(res), end=' ') return res
def parse13FHoldings(accNo, formType=None): """ Parses a 13F filing, returning the result in the form: { 'period': 'YYYY-MM-DD', 'acceptDate': 'YYYY-MM-DD', 'acceptTime': 'HH:MM:SS', 'cik' : 'DDDDDDDDDD', 'holdings': [(cusip, name, value, title, count, putCall), ... ] } where the field values are as given in the table, except putCall is 'CALL', 'PUT', or ''. """ info = basicInfo.getSecFormInfo(accNo, formType) xmlUrls = [l[-1] for l in info['links'] if l[0].lower().endswith('xml')] if len(xmlUrls) == 1: xmlSummTab = utils.downloadSecUrl(xmlUrls[0], toFormat='xml') tot = int( findChildSeries( xmlSummTab, ['formdata', 'summarypage', 'tableentrytotal']).text.strip()) if tot == 0: print('*** zero total, table not present') else: print('*** nonzero total, but table not present') holdings = [] else: xmlTab = utils.downloadSecUrl(xmlUrls[-1], toFormat='xml') tabRows = [ tabRow for tabRow in xmlTab if tabRow.tag.lower().endswith('infotable') ] if len(xmlTab) != len(tabRows): print('*** #rows mismatch', len(xmlTab), 'all children', len(tabRows), 'table rows') if len(tabRows) == 0: print('*** no holdings in table') holdings = [getRowInfo(tabRow) for tabRow in tabRows] if len(info['ciks']) != 1: print('*** unexpected number of CIKs!=1', info['ciks']) return { 'period': info['period'], 'acceptDate': info['acceptDate'], 'acceptTime': info['acceptTime'], 'cik': info['ciks'][0], 'holdings': holdings }
def getCikToTickersMap(): """ Retrieves and parses an SEC-maintained list mapping tickers to CIKs. Returns a defaultdict: cik -> list of corresponding tickers """ tickersJSON = utils.downloadSecUrl('/files/company_tickers.json', toFormat='json') cikToTickers = collections.defaultdict(list) for v in tickersJSON.values(): cikToTickers[str(v['cik_str'])].append(v['ticker']) return cikToTickers
def get99Texts(info, textLimit=defaultTextLimit): res = [] for _, _, linkType, url in info.get('links', []): if (linkType.lower().startswith('ex-99') and (url.lower().endswith('.htm') or url.lower().endswith('.html'))): urlText = utils.downloadSecUrl(url, toFormat='souptext') m = startExhibitPat.match(urlText) if m: urlText = urlText[m.end():] res.append(urlText[:textLimit].strip()) else: res.append('') return res
def getSecTickerDict(): """ Creates a dict mapping ticker -> CIK based on download from the SEC. """ secTickerJson = utils.downloadSecUrl(secTickerListUrl, toFormat='json') tickerPos = secTickerJson['fields'].index('ticker') cikPos = secTickerJson['fields'].index('cik') res = {} for tup in secTickerJson['data']: ticker, cik = tup[tickerPos], tup[cikPos] if ticker not in res or int(res[ticker]) < cik: res[ticker] = str(cik) return res
def getRecent(cik): cik = str(cik).lstrip('0') restFilingsUrl = f'/submissions/CIK{cik.zfill(10)}.json' filingsJson = utils.downloadSecUrl(restFilingsUrl, restData=True, toFormat='json') recentList = filingsJson['filings']['recent'] accNos = recentList['accessionNumber'] print(len(accNos), 'filings for', filingsJson['name']) fDates = [fDate.replace('-', '') for fDate in recentList['filingDate']] return [ (formType, accNo, fDate) for formType, accNo, fDate in zip(recentList['form'], accNos, fDates) ]
def get13FAmendmentType(accNo, formType=None): """ Gets the amendment type for a 13F-HR/A filing - may be RESTATEMENT or NEW HOLDINGS. This turned out to be unreliable (often missing or wrong), so I don't use it to get the combined holdings for an investor. Instead I just look at the number of holdings in an amendment compared to the previous filing, and treat it as a restatement if the new number of holdings is more than half the old number. """ info = basicInfo.getSecFormInfo(accNo, formType) xmlUrls = [l[-1] for l in info['links'] if l[0].lower().endswith('xml')] xmlSummTab = utils.downloadSecUrl(xmlUrls[0], toFormat='xml') coverPage = findChildSeries(xmlSummTab, ['formdata', 'coverpage']) isAmendment = findChildEndingWith(coverPage, 'isamendment') if isAmendment is None or isAmendment.text.strip().lower() not in [ 'true', 'yes' ]: return None return findChildSeries(coverPage, ['amendmentinfo', 'amendmenttype']).text.strip()
def getXbrlFeed() : s = utils.downloadSecUrl(secXbrlFeedUrl, toFormat='soup') l = s.find_all('item') # print(len(l),'XBRL items') res = [] for item in l : try : itemL = [item.find('edgar:'+tag).string.strip() for tag in ['filingdate','companyname','accessionnumber','formtype','ciknumber']] m = dateStrMMDDPat.match(itemL[0]) if m is None : raise Exception("MM/DD/YYYY format expected for filingdate") itemL[0] = m.group(3)+'-'+m.group(1)+'-'+m.group(2) itemL[4] = itemL[4].lstrip('0') res.append(tuple(itemL)) except Exception as e: print('**** ERROR',e) print('**** PARSING',item) return res
def downloadSecFormList(listUrl): fListRes = utils.downloadSecUrl(listUrl) r = csv.reader(fListRes.splitlines(), delimiter='|') res = [] for entry in r: if len(entry) == 5 and entry[0].isdigit(): cik, cikName, formType, fileDate, txtF = entry else: if len(res) > 0: print('invalid entry', entry) continue fileDate = fileDate.replace('-', '').replace('/', '') m = edgarTxtFPat.match(txtF) if not m: print('missing accession no in', entry) continue if m.group(1) != cik: print('cik mismatch in', entry) res.append((cik, cikName, formType, fileDate, m.group(2))) return res
def parse8K(accNo, formType=None, textLimit=basicInfo.defaultTextLimit) : info = basicInfo.getSecFormInfo(accNo, formType=formType, get99=True, textLimit=textLimit) links = info['links'] if len(links) == 0 : utils.printErrInfoOrAccessNo('NO LINKS LIST in',accNo) return info if formType is None : formType = links[0][2] items = info.get('items',[]) if len(items) == 0 : return info mainText = utils.downloadSecUrl(links[0][3], toFormat='souptext') if formType.lower() == '8-k/a' : m = explanPat.search(mainText) if m is not None : info['explanatoryNote'] = mainText[m.start():m.start()+textLimit] itemPosL = [0] info['itemTexts'] = itemTexts = [None for item in items] for i,item in enumerate(items) : m = itemPat.match(item) if m is None : utils.printErrInfoOrAccessNo(f"unexpected format for item header {item}",accNo) continue m = re.search(r'item[\s\-\.]*' + r'\s*'.join(m.group(1)).replace('.',r'\.'), mainText[itemPosL[-1]:], re.IGNORECASE) if m is None : utils.printErrInfoOrAccessNo(f"couldn't find {item}",accNo) continue itemPosL.append(itemPosL[-1]+m.start()) itemTexts[i] = '' # print('pos for',item,itemPosL[-1]) itemPosL.append(len(mainText)) j = 1 for i in range(len(itemTexts)) : if itemTexts[i] is None : itemTexts[i] = items[i] + ' ???' else : itemTexts[i] = mainText[itemPosL[j] : min(itemPosL[j]+textLimit, itemPosL[j+1])] j += 1 return info
def parse34(accNo, formType=None): info = basicInfo.getSecFormInfo(accNo, formType) links = info['links'] info['transactions'] = [] info['reportingName'] = [] info['reportingCik'] = [] try: form4Soup = utils.downloadSecUrl(links[1][-1], toFormat='soup') for trans in form4Soup.find_all(form4TransactionPat): info['transactions'].append( tuple(getForm4Value(trans, vPat) for vPat in form4ValuePats)) for rNameTag in form4Soup.find_all(form4ReportingNamePat): info['reportingName'].append( utils.getCombSoupText(rNameTag).strip()) for cikTag in form4Soup.find_all(form4ReportingCikPat): info['reportingCik'].append( utils.getCombSoupText(cikTag).strip().lstrip('0')) except Exception as e: print('missing or invalid form 4 XML file:', e) for k in ['transactions', 'reportingName', 'reportingCik']: if len(info[k]) == 0: print(f'*** NO {k.upper()} ***') return info
def getRecentChunk(count=100) : """ Parses the SEC's atom-format feed of most recent filings and returns a list of tuples: [(fileDate, cikName, accNo, formType, cik), ... ] with the most recent filings first """ mrListXml = utils.downloadSecUrl(secMostRecentListUrl(count=count), toFormat='xml') res = [] for listEntry in mrListXml : if not listEntry.tag.lower().endswith("entry") : continue cik = formType = accNo = fDate = cikName = None for entryItem in listEntry : itemTag = entryItem.tag.lower() if itemTag.endswith('title') : # print('"'+entryItem.text.strip()+'"') m = titlePat.match(entryItem.text) if m is None : printXmlParseWarning('unable to parse title element',listEntry) continue formType,cikName,cik = m.groups() cik = cik.lstrip('0') # print(repr(formType),repr(cikName),repr(cik)) elif itemTag.endswith('summary') : # print('"'+entryItem.text.strip()+'"') m = filedPat.search(entryItem.text) if m is None : printXmlParseWarning('unable to parse summary element',listEntry) continue fDate,accNo = m.groups() # print(repr(fDate),repr(accNo)) fTup = (fDate, cikName, accNo, formType, cik) if all(fTup) : res.append(fTup) return res
def getSecFormInfo(accessNo, formType=None, get99=False, textLimit=defaultTextLimit): """ Parses some basic information from the index URL of a form with the given accession number. Returns a dict { 'links' : [(name, description, type, sublink), ... ], 'complete' : completeTextLink, 'ciks' : [cik, ... ], } For some forms, additional fields may be parsed: 'period' : 'YYYY-MM-DD', 'acceptDate' : 'YYYY-MM-DD', 'acceptTime' : 'HH:MM:SS', 'items' : [title, ... ], 'filedByCik' : cik, """ indexSoup = utils.downloadSecUrl(accessNo, toFormat='soup') indexFullUrl = utils.secIndexUrl(accessNo, True) links, completeLink = getSecFormLinkList(indexSoup, accessNo) res = { 'links': links, 'complete': completeLink, } res['ciks'], filedByCik, res['cikTypes'] = getSecFormCiks( indexSoup, accessNo) if filedByCik is not None: res['filedByCik'] = filedByCik if get99: res['text99'] = get99Texts(res, textLimit=textLimit) if links: if formType is None: formType = links[0][2] elif formType != links[0][2]: print('formType mismatch', formType, links[0]) missingPeriodMessage = None if formType is None or not dailyList.noPeriodFormTypes.match(formType): missingPeriodMessage = 'missing period in ' + indexFullUrl getTextAfterTag(res, 'period', indexSoup, periodPat, missingMessage=missingPeriodMessage) if 'period' in res and not periodDatePat.match(res['period']): print('malformed period', res['period'], 'in', indexFullUrl) del res['period'] getTextAfterTag(res, 'acceptDateTime', indexSoup, acceptedPat, missingMessage='missing accepted in ' + indexFullUrl) if 'acceptDateTime' in res: m = acceptedDateTimePat.match(res['acceptDateTime']) if not m: print('malformed accept date/time', res['acceptDateTime']) print('in', indexFullUrl) else: res['acceptDate'] = m.group(1) res['acceptTime'] = m.group(2) del res['acceptDateTime'] missingItemsMessage = None if formType is None or itemFormTypes.match(formType): missingItemsMessage = 'missing items in ' + indexFullUrl getTextAfterTag(res, 'items', indexSoup, itemsPat, strListHeader='item', missingMessage=missingItemsMessage) return res