Beispiel #1
0
def getDailyFList(d, listIndexCache=None):
    """
    Returns a list of SEC filed forms:
        [(cik, cikName, formType, fileDate, accNo), ... ]
    for the given date or ISO date string, retrieved from
    the SEC daily index.
    """
    dateStr = utils.toDateStr(d)
    listUrl, listIndexUrl = getSecDailyIndexUrls(dateStr)
    if listIndexCache is None or listIndexUrl not in listIndexCache:
        listIndexJson = utils.downloadSecUrl(listIndexUrl, toFormat='json')
        listIndex = set(item['name']
                        for item in listIndexJson['directory']['item']
                        if item['name'].startswith('master'))
        print(f'### list index {len(listIndex)}', end=' ')
        if listIndexCache is not None:
            listIndexCache[listIndexUrl] = listIndex
    else:
        listIndex = listIndexCache[listIndexUrl]
    if 'master.' + dateStr + '.idx' not in listIndex:
        print('HOLIDAY', end=' ')
        return []
    res = downloadSecFormList(listUrl)
    print('count for', dateStr + ':', len(res), end=' ')
    return res
Beispiel #2
0
def parse13FHoldings(accNo, formType=None):
    """
    Parses a 13F filing, returning the result in the form:
    {
        'period': 'YYYY-MM-DD',
        'acceptDate': 'YYYY-MM-DD',
        'acceptTime': 'HH:MM:SS',
        'cik' : 'DDDDDDDDDD',
        'holdings': [(cusip, name, value, title, count, putCall), ... ]
    }
    where the field values are as given in the table,
    except putCall is 'CALL', 'PUT', or ''.
    """
    info = basicInfo.getSecFormInfo(accNo, formType)
    xmlUrls = [l[-1] for l in info['links'] if l[0].lower().endswith('xml')]
    if len(xmlUrls) == 1:
        xmlSummTab = utils.downloadSecUrl(xmlUrls[0], toFormat='xml')
        tot = int(
            findChildSeries(
                xmlSummTab,
                ['formdata', 'summarypage', 'tableentrytotal']).text.strip())
        if tot == 0:
            print('*** zero total, table not present')
        else:
            print('*** nonzero total, but table not present')
        holdings = []
    else:
        xmlTab = utils.downloadSecUrl(xmlUrls[-1], toFormat='xml')
        tabRows = [
            tabRow for tabRow in xmlTab
            if tabRow.tag.lower().endswith('infotable')
        ]
        if len(xmlTab) != len(tabRows):
            print('*** #rows mismatch', len(xmlTab), 'all children',
                  len(tabRows), 'table rows')
        if len(tabRows) == 0:
            print('*** no holdings in table')
        holdings = [getRowInfo(tabRow) for tabRow in tabRows]
    if len(info['ciks']) != 1:
        print('*** unexpected number of CIKs!=1', info['ciks'])
    return {
        'period': info['period'],
        'acceptDate': info['acceptDate'],
        'acceptTime': info['acceptTime'],
        'cik': info['ciks'][0],
        'holdings': holdings
    }
Beispiel #3
0
def getCikToTickersMap():
    """
    Retrieves and parses an SEC-maintained list mapping tickers to CIKs.
    Returns a defaultdict: cik -> list of corresponding tickers
    """
    tickersJSON = utils.downloadSecUrl('/files/company_tickers.json',
                                       toFormat='json')
    cikToTickers = collections.defaultdict(list)
    for v in tickersJSON.values():
        cikToTickers[str(v['cik_str'])].append(v['ticker'])
    return cikToTickers
Beispiel #4
0
def get99Texts(info, textLimit=defaultTextLimit):
    res = []
    for _, _, linkType, url in info.get('links', []):
        if (linkType.lower().startswith('ex-99') and
            (url.lower().endswith('.htm') or url.lower().endswith('.html'))):
            urlText = utils.downloadSecUrl(url, toFormat='souptext')
            m = startExhibitPat.match(urlText)
            if m:
                urlText = urlText[m.end():]
            res.append(urlText[:textLimit].strip())
        else:
            res.append('')
    return res
Beispiel #5
0
def getSecTickerDict():
    """
    Creates a dict mapping ticker -> CIK based on download from the SEC.
    """
    secTickerJson = utils.downloadSecUrl(secTickerListUrl, toFormat='json')
    tickerPos = secTickerJson['fields'].index('ticker')
    cikPos = secTickerJson['fields'].index('cik')
    res = {}
    for tup in secTickerJson['data']:
        ticker, cik = tup[tickerPos], tup[cikPos]
        if ticker not in res or int(res[ticker]) < cik:
            res[ticker] = str(cik)
    return res
Beispiel #6
0
def getRecent(cik):
    cik = str(cik).lstrip('0')
    restFilingsUrl = f'/submissions/CIK{cik.zfill(10)}.json'
    filingsJson = utils.downloadSecUrl(restFilingsUrl,
                                       restData=True,
                                       toFormat='json')
    recentList = filingsJson['filings']['recent']
    accNos = recentList['accessionNumber']
    print(len(accNos), 'filings for', filingsJson['name'])
    fDates = [fDate.replace('-', '') for fDate in recentList['filingDate']]
    return [
        (formType, accNo, fDate)
        for formType, accNo, fDate in zip(recentList['form'], accNos, fDates)
    ]
Beispiel #7
0
def get13FAmendmentType(accNo, formType=None):
    """
    Gets the amendment type for a 13F-HR/A filing - may be RESTATEMENT or NEW HOLDINGS.
    This turned out to be unreliable (often missing or wrong), so I don't use it to get
    the combined holdings for an investor. Instead I just look at the number of holdings
    in an amendment compared to the previous filing, and treat it as a restatement
    if the new number of holdings is more than half the old number.
    """
    info = basicInfo.getSecFormInfo(accNo, formType)
    xmlUrls = [l[-1] for l in info['links'] if l[0].lower().endswith('xml')]
    xmlSummTab = utils.downloadSecUrl(xmlUrls[0], toFormat='xml')
    coverPage = findChildSeries(xmlSummTab, ['formdata', 'coverpage'])
    isAmendment = findChildEndingWith(coverPage, 'isamendment')
    if isAmendment is None or isAmendment.text.strip().lower() not in [
            'true', 'yes'
    ]:
        return None
    return findChildSeries(coverPage,
                           ['amendmentinfo', 'amendmenttype']).text.strip()
Beispiel #8
0
def getXbrlFeed() :
    s = utils.downloadSecUrl(secXbrlFeedUrl, toFormat='soup')
    l = s.find_all('item')
    # print(len(l),'XBRL items')
    res = []
    for item in l :
        try :
            itemL = [item.find('edgar:'+tag).string.strip()
                     for tag in ['filingdate','companyname','accessionnumber','formtype','ciknumber']]
            m = dateStrMMDDPat.match(itemL[0])
            if m is None :
                raise Exception("MM/DD/YYYY format expected for filingdate")
            itemL[0] = m.group(3)+'-'+m.group(1)+'-'+m.group(2)
            itemL[4] = itemL[4].lstrip('0')
            res.append(tuple(itemL))
        except Exception as e:
            print('**** ERROR',e)
            print('**** PARSING',item)
    return res
Beispiel #9
0
def downloadSecFormList(listUrl):
    fListRes = utils.downloadSecUrl(listUrl)
    r = csv.reader(fListRes.splitlines(), delimiter='|')
    res = []
    for entry in r:
        if len(entry) == 5 and entry[0].isdigit():
            cik, cikName, formType, fileDate, txtF = entry
        else:
            if len(res) > 0:
                print('invalid entry', entry)
            continue
        fileDate = fileDate.replace('-', '').replace('/', '')
        m = edgarTxtFPat.match(txtF)
        if not m:
            print('missing accession no in', entry)
            continue
        if m.group(1) != cik:
            print('cik mismatch in', entry)
        res.append((cik, cikName, formType, fileDate, m.group(2)))
    return res
Beispiel #10
0
def parse8K(accNo, formType=None, textLimit=basicInfo.defaultTextLimit) :
    info = basicInfo.getSecFormInfo(accNo, formType=formType, get99=True, textLimit=textLimit)
    links = info['links']
    if len(links) == 0 :
        utils.printErrInfoOrAccessNo('NO LINKS LIST in',accNo)
        return info
    if formType is None :
        formType = links[0][2]
    items = info.get('items',[])
    if len(items) == 0 :
        return info
    mainText = utils.downloadSecUrl(links[0][3], toFormat='souptext')
    if formType.lower() == '8-k/a' :
        m = explanPat.search(mainText)
        if m is not None :
            info['explanatoryNote'] = mainText[m.start():m.start()+textLimit]
    itemPosL = [0]
    info['itemTexts'] = itemTexts = [None for item in items]
    for i,item in enumerate(items) :
        m = itemPat.match(item)
        if m is None :
            utils.printErrInfoOrAccessNo(f"unexpected format for item header {item}",accNo)
            continue
        m = re.search(r'item[\s\-\.]*' + r'\s*'.join(m.group(1)).replace('.',r'\.'),
                      mainText[itemPosL[-1]:], re.IGNORECASE)
        if m is None :
            utils.printErrInfoOrAccessNo(f"couldn't find {item}",accNo)
            continue
        itemPosL.append(itemPosL[-1]+m.start())
        itemTexts[i] = ''
        # print('pos for',item,itemPosL[-1])
    itemPosL.append(len(mainText))
    j = 1
    for i in range(len(itemTexts)) :
        if itemTexts[i] is None :
            itemTexts[i] = items[i] + ' ???'
        else :
            itemTexts[i] = mainText[itemPosL[j] : min(itemPosL[j]+textLimit, itemPosL[j+1])]
            j += 1
    return info
Beispiel #11
0
def parse34(accNo, formType=None):
    info = basicInfo.getSecFormInfo(accNo, formType)
    links = info['links']
    info['transactions'] = []
    info['reportingName'] = []
    info['reportingCik'] = []
    try:
        form4Soup = utils.downloadSecUrl(links[1][-1], toFormat='soup')
        for trans in form4Soup.find_all(form4TransactionPat):
            info['transactions'].append(
                tuple(getForm4Value(trans, vPat) for vPat in form4ValuePats))
        for rNameTag in form4Soup.find_all(form4ReportingNamePat):
            info['reportingName'].append(
                utils.getCombSoupText(rNameTag).strip())
        for cikTag in form4Soup.find_all(form4ReportingCikPat):
            info['reportingCik'].append(
                utils.getCombSoupText(cikTag).strip().lstrip('0'))
    except Exception as e:
        print('missing or invalid form 4 XML file:', e)
    for k in ['transactions', 'reportingName', 'reportingCik']:
        if len(info[k]) == 0:
            print(f'*** NO {k.upper()} ***')
    return info
Beispiel #12
0
def getRecentChunk(count=100) :
    """
    Parses the SEC's atom-format feed of most recent filings and returns a list of tuples:
        [(fileDate, cikName, accNo, formType, cik),
         ... ]
    with the most recent filings first
    """
    mrListXml = utils.downloadSecUrl(secMostRecentListUrl(count=count), toFormat='xml')
    res = []
    for listEntry in mrListXml :
        if not listEntry.tag.lower().endswith("entry") :
            continue
        cik = formType = accNo = fDate = cikName = None
        for entryItem in listEntry :
            itemTag = entryItem.tag.lower()
            if itemTag.endswith('title') :
                # print('"'+entryItem.text.strip()+'"')
                m = titlePat.match(entryItem.text)
                if m is None :
                    printXmlParseWarning('unable to parse title element',listEntry)
                    continue
                formType,cikName,cik = m.groups()
                cik = cik.lstrip('0')
                # print(repr(formType),repr(cikName),repr(cik))
            elif itemTag.endswith('summary') :
                # print('"'+entryItem.text.strip()+'"')
                m = filedPat.search(entryItem.text)
                if m is None :
                    printXmlParseWarning('unable to parse summary element',listEntry)
                    continue
                fDate,accNo = m.groups()
                # print(repr(fDate),repr(accNo))
        fTup = (fDate, cikName, accNo, formType, cik)
        if all(fTup) :
            res.append(fTup)
    return res
Beispiel #13
0
def getSecFormInfo(accessNo,
                   formType=None,
                   get99=False,
                   textLimit=defaultTextLimit):
    """
    Parses some basic information from the index URL of a form with the given accession number.
    Returns a dict {
        'links' : [(name, description, type, sublink), ... ],
        'complete' : completeTextLink,
        'ciks' : [cik, ... ],
    }
    For some forms, additional fields may be parsed:
        'period' : 'YYYY-MM-DD',
        'acceptDate' : 'YYYY-MM-DD',
        'acceptTime' : 'HH:MM:SS',
        'items' : [title, ... ],
        'filedByCik' : cik,
    """
    indexSoup = utils.downloadSecUrl(accessNo, toFormat='soup')
    indexFullUrl = utils.secIndexUrl(accessNo, True)
    links, completeLink = getSecFormLinkList(indexSoup, accessNo)
    res = {
        'links': links,
        'complete': completeLink,
    }
    res['ciks'], filedByCik, res['cikTypes'] = getSecFormCiks(
        indexSoup, accessNo)
    if filedByCik is not None:
        res['filedByCik'] = filedByCik
    if get99:
        res['text99'] = get99Texts(res, textLimit=textLimit)
    if links:
        if formType is None:
            formType = links[0][2]
        elif formType != links[0][2]:
            print('formType mismatch', formType, links[0])
    missingPeriodMessage = None
    if formType is None or not dailyList.noPeriodFormTypes.match(formType):
        missingPeriodMessage = 'missing period in ' + indexFullUrl
    getTextAfterTag(res,
                    'period',
                    indexSoup,
                    periodPat,
                    missingMessage=missingPeriodMessage)
    if 'period' in res and not periodDatePat.match(res['period']):
        print('malformed period', res['period'], 'in', indexFullUrl)
        del res['period']
    getTextAfterTag(res,
                    'acceptDateTime',
                    indexSoup,
                    acceptedPat,
                    missingMessage='missing accepted in ' + indexFullUrl)
    if 'acceptDateTime' in res:
        m = acceptedDateTimePat.match(res['acceptDateTime'])
        if not m:
            print('malformed accept date/time', res['acceptDateTime'])
            print('in', indexFullUrl)
        else:
            res['acceptDate'] = m.group(1)
            res['acceptTime'] = m.group(2)
        del res['acceptDateTime']
    missingItemsMessage = None
    if formType is None or itemFormTypes.match(formType):
        missingItemsMessage = 'missing items in ' + indexFullUrl
    getTextAfterTag(res,
                    'items',
                    indexSoup,
                    itemsPat,
                    strListHeader='item',
                    missingMessage=missingItemsMessage)
    return res