コード例 #1
0
    def test_parse_quarter_or_annual_page(self):
        '''
        Parse the page that lists filings of a quater or a year of a company.

        Example:
        http://www.sec.gov/Archives/edgar/data/1288776/000128877613000055/0001288776-13-000055-index.htm

        '''
        spider = EdgarSpider()
        spider._follow_links = True  # HACK

        body = '''
            <html><body>
            <a href="http://example.com">Useless Link</a>
            <a href="/Archives/edgar/data/123/abc-20130630.xml">Link</a>
            <a href="/Archives/edgar/123/456/abc123-20130630.xml">Useless Link</a>
            <a href="/Archives/edgar/data/456/789/hello-20130630.xml">Link</a>
            <a href="/Archives/edgar/123/456/hello-20130630.xml">Useless Link</a>
            <a href="/Archives/data/123/456/hello-20130630.xml">Useless Link</a>
            <a href="/Archives/edgar/data/123/456/hello-201306300.xml">Useless Link</a>
            <a href="/Archives/edgar/data/123/456/xyz-20130630.html">Link</a>
            </body></html>
        '''

        response = HtmlResponse('http://sec.gov/mock', body=body)
        requests = spider.parse(response)
        urls = [r.url for r in requests]

        self.assertEqual(urls, [
            'http://sec.gov/Archives/edgar/data/123/abc-20130630.xml',
            'http://sec.gov/Archives/edgar/data/456/789/hello-20130630.xml'
        ])
コード例 #2
0
    def test_parse_quarter_or_annual_page(self):
        '''
        Parse the page that lists filings of a quater or a year of a company.

        Example:
        http://www.sec.gov/Archives/edgar/data/1288776/000128877613000055/0001288776-13-000055-index.htm

        '''
        spider = EdgarSpider()
        spider._follow_links = True  # HACK

        body = '''
            <html><body>
            <a href="http://example.com">Useless Link</a>
            <a href="/Archives/edgar/data/123/abc-20130630.xml">Link</a>
            <a href="/Archives/edgar/123/456/abc123-20130630.xml">Useless Link</a>
            <a href="/Archives/edgar/data/456/789/hello-20130630.xml">Link</a>
            <a href="/Archives/edgar/123/456/hello-20130630.xml">Useless Link</a>
            <a href="/Archives/data/123/456/hello-20130630.xml">Useless Link</a>
            <a href="/Archives/edgar/data/123/456/hello-201306300.xml">Useless Link</a>
            <a href="/Archives/edgar/data/123/456/xyz-20130630.html">Link</a>
            </body></html>
        '''

        response = HtmlResponse('http://sec.gov/mock', body=body)
        requests = spider.parse(response)
        urls = [r.url for r in requests]

        self.assertEqual(urls, [
            'http://sec.gov/Archives/edgar/data/123/abc-20130630.xml',
            'http://sec.gov/Archives/edgar/data/456/789/hello-20130630.xml'
        ])
コード例 #3
0
    def test_parse_xml_report(self):
        '''Parse XML 10-Q or 10-K report.'''
        spider = EdgarSpider()
        spider._follow_links = True  # HACK

        body = '''
            <?xml version="1.0">
            <xbrl xmlns="http://www.xbrl.org/2003/instance"
                  xmlns:xbrli="http://www.xbrl.org/2003/instance"
                  xmlns:dei="http://xbrl.sec.gov/dei/2011-01-31"
                  xmlns:us-gaap="http://fasb.org/us-gaap/2011-01-31">

              <context id="c1">
                <startDate>2013-03-31</startDate>
                <endDate>2013-06-28</endDate>
              </context>

              <dei:AmendmentFlag contextRef="c1">false</dei:AmendmentFlag>
              <dei:DocumentType contextRef="c1">10-Q</dei:DocumentType>
              <dei:DocumentFiscalPeriodFocus contextRef="c1">Q2</dei:DocumentFiscalPeriodFocus>
              <dei:DocumentPeriodEndDate contextRef="c1">2013-06-28</dei:DocumentPeriodEndDate>
              <dei:DocumentFiscalYearFocus>2013</dei>

              <us-gaap:Revenues contextRef="c1">100</us-gaap:Revenues>
              <us-gaap:NetIncomeLoss contextRef="c1">200</us-gaap:NetIncomeLoss>
              <us-gaap:EarningsPerShareBasic contextRef="c1">0.2</us-gaap:EarningsPerShareBasic>
              <us-gaap:EarningsPerShareDiluted contextRef="c1">0.19</us-gaap:EarningsPerShareDiluted>
              <us-gaap:CommonStockDividendsPerShareDeclared contextRef="c1">0.07</us-gaap:CommonStockDividendsPerShareDeclared>

              <us-gaap:Assets contextRef="c1">1600</us-gaap:Assets>
              <us-gaap:StockholdersEquity contextRef="c1">300</us-gaap:StockholdersEquity>
              <us-gaap:CashAndCashEquivalentsAtCarryingValue contextRef="c1">150</us-gaap:CashAndCashEquivalentsAtCarryingValue>
            </xbrl>
        '''

        response = XmlResponse(
            'http://sec.gov/Archives/edgar/data/123/abc-20130720.xml',
            body=body)
        item = spider.parse_10qk(response)

        self.assert_item(
            item, {
                'symbol': 'ABC',
                'amend': False,
                'doc_type': '10-Q',
                'period_focus': 'Q2',
                'fiscal_year': 2013,
                'end_date': '2013-06-28',
                'revenues': 100.0,
                'net_income': 200.0,
                'eps_basic': 0.2,
                'eps_diluted': 0.19,
                'dividend': 0.07,
                'assets': 1600.0,
                'equity': 300.0,
                'cash': 150.0
            })
コード例 #4
0
    def test_parse_xml_report(self):
        '''Parse XML 10-Q or 10-K report.'''
        spider = EdgarSpider()
        spider._follow_links = True  # HACK

        body = '''
            <?xml version="1.0">
            <xbrl xmlns="http://www.xbrl.org/2003/instance"
                  xmlns:xbrli="http://www.xbrl.org/2003/instance"
                  xmlns:dei="http://xbrl.sec.gov/dei/2011-01-31"
                  xmlns:us-gaap="http://fasb.org/us-gaap/2011-01-31">

              <context id="c1">
                <startDate>2013-03-31</startDate>
                <endDate>2013-06-28</endDate>
              </context>

              <dei:AmendmentFlag contextRef="c1">false</dei:AmendmentFlag>
              <dei:DocumentType contextRef="c1">10-Q</dei:DocumentType>
              <dei:DocumentFiscalPeriodFocus contextRef="c1">Q2</dei:DocumentFiscalPeriodFocus>
              <dei:DocumentPeriodEndDate contextRef="c1">2013-06-28</dei:DocumentPeriodEndDate>
              <dei:DocumentFiscalYearFocus>2013</dei>

              <us-gaap:Revenues contextRef="c1">100</us-gaap:Revenues>
              <us-gaap:NetIncomeLoss contextRef="c1">200</us-gaap:NetIncomeLoss>
              <us-gaap:EarningsPerShareBasic contextRef="c1">0.2</us-gaap:EarningsPerShareBasic>
              <us-gaap:EarningsPerShareDiluted contextRef="c1">0.19</us-gaap:EarningsPerShareDiluted>
              <us-gaap:CommonStockDividendsPerShareDeclared contextRef="c1">0.07</us-gaap:CommonStockDividendsPerShareDeclared>

              <us-gaap:Assets contextRef="c1">1600</us-gaap:Assets>
              <us-gaap:StockholdersEquity contextRef="c1">300</us-gaap:StockholdersEquity>
              <us-gaap:CashAndCashEquivalentsAtCarryingValue contextRef="c1">150</us-gaap:CashAndCashEquivalentsAtCarryingValue>
            </xbrl>
        '''

        response = XmlResponse('http://sec.gov/Archives/edgar/data/123/abc-20130720.xml', body=body)
        item = spider.parse_10qk(response)

        self.assert_item(item, {
            'symbol': 'ABC',
            'amend': False,
            'doc_type': '10-Q',
            'period_focus': 'Q2',
            'fiscal_year': 2013,
            'end_date': '2013-06-28',
            'revenues': 100.0,
            'net_income': 200.0,
            'eps_basic': 0.2,
            'eps_diluted': 0.19,
            'dividend': 0.07,
            'assets': 1600.0,
            'equity': 300.0,
            'cash': 150.0
        })
コード例 #5
0
    def test_parse_company_filing_page(self):
        '''
        Parse the page that lists all filings of a company.

        Example:
        http://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001288776&type=10-&dateb=&owner=exclude&count=40

        '''
        spider = EdgarSpider()
        spider._follow_links = True  # HACK

        body = '''
            <html><body>
            <a href="http://example.com/">Useless Link</a>
            <a href="/Archives/edgar/data/abc-index.htm">Link</a>
            <a href="/Archives/edgar/data/123-index.htm">Link</a>
            <a href="/Archives/edgar/data/123.htm">Useless Link</a>
            <a href="/Archives/edgar/data/123/abc-index.htm">Link</a>
            <a href="/Archives/edgar/data/123/456/abc123-index.htm">Link</a>
            <a href="/Archives/edgar/123/abc-index.htm">Uselss Link</a>
            <a href="/Archives/edgar/data/123/456/789/HELLO-index.htm">Link</a>
            <a href="/Archives/hello-index.html">Useless Link</a>
            </body></html>
        '''

        response = HtmlResponse('http://sec.gov/mock', body=body)
        requests = spider.parse(response)
        urls = [r.url for r in requests]

        self.assertEqual(urls, [
            'http://sec.gov/Archives/edgar/data/abc-index.htm',
            'http://sec.gov/Archives/edgar/data/123-index.htm',
            'http://sec.gov/Archives/edgar/data/123/abc-index.htm',
            'http://sec.gov/Archives/edgar/data/123/456/abc123-index.htm',
            'http://sec.gov/Archives/edgar/data/123/456/789/HELLO-index.htm'
        ])
コード例 #6
0
    def test_parse_company_filing_page(self):
        '''
        Parse the page that lists all filings of a company.

        Example:
        http://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001288776&type=10-&dateb=&owner=exclude&count=40

        '''
        spider = EdgarSpider()
        spider._follow_links = True  # HACK

        body = '''
            <html><body>
            <a href="http://example.com/">Useless Link</a>
            <a href="/Archives/edgar/data/abc-index.htm">Link</a>
            <a href="/Archives/edgar/data/123-index.htm">Link</a>
            <a href="/Archives/edgar/data/123.htm">Useless Link</a>
            <a href="/Archives/edgar/data/123/abc-index.htm">Link</a>
            <a href="/Archives/edgar/data/123/456/abc123-index.htm">Link</a>
            <a href="/Archives/edgar/123/abc-index.htm">Uselss Link</a>
            <a href="/Archives/edgar/data/123/456/789/HELLO-index.htm">Link</a>
            <a href="/Archives/hello-index.html">Useless Link</a>
            </body></html>
        '''

        response = HtmlResponse('http://sec.gov/mock', body=body)
        requests = spider.parse(response)
        urls = [r.url for r in requests]

        self.assertEqual(urls, [
            'http://sec.gov/Archives/edgar/data/abc-index.htm',
            'http://sec.gov/Archives/edgar/data/123-index.htm',
            'http://sec.gov/Archives/edgar/data/123/abc-index.htm',
            'http://sec.gov/Archives/edgar/data/123/456/abc123-index.htm',
            'http://sec.gov/Archives/edgar/data/123/456/789/HELLO-index.htm'
        ])
コード例 #7
0
    def test_symbol_file(self):
        # create a mock file of a list of symbols
        f = tempfile.NamedTemporaryFile('w', delete=False)
        f.write('# Comment\nGOOG\nADBE\nLNKD\n#comment\nJPM\n')
        f.close()

        spider = EdgarSpider(symbols=f.name)
        urls = list(spider.start_urls)

        self.assertEqual(urls, [
            make_url('GOOG'),
            make_url('ADBE'),
            make_url('LNKD'),
            make_url('JPM')
        ])

        os.remove(f.name)
コード例 #8
0
    def test_symbol_file_and_dates(self):
        # create a mock file of a list of symbols
        f = tempfile.NamedTemporaryFile('w', delete=False)
        f.write('# Comment\nT\nCBS\nWMT\n')
        f.close()

        spider = EdgarSpider(symbols=f.name,
                             startdate='20110101',
                             enddate='20130630')
        urls = list(spider.start_urls)

        self.assertEqual(urls, [
            make_url('T', '20110101', '20130630'),
            make_url('CBS', '20110101', '20130630'),
            make_url('WMT', '20110101', '20130630')
        ])

        os.remove(f.name)
コード例 #9
0
    def test_invalid_dates(self):
        with self.assertRaises(ValueError):
            EdgarSpider(startdate='12345678')

        with self.assertRaises(ValueError):
            EdgarSpider(enddate='12345678')
コード例 #10
0
 def test_empty_creation(self):
     spider = EdgarSpider()
     self.assertEqual(spider.start_urls, [])