def test_DealsWith404PdfAddressesCorrectly(self):
        # I don't know why they'd be deleting these files, but when they do (and
        # they do) we have to handle it.
        wrapper = PhillyLegistarSiteWrapper(root_url='')
        expected_text = ''

        attachment_pdf = 'http://legislation.phila.gov/attachments/115954.pdf'
        attachment_text = wrapper.extract_pdf_text(attachment_pdf)
        self.assertEqual(attachment_text, expected_text)
    def test_ResolutionPdfParsesCorrectly(self):
        wrapper = PhillyLegistarSiteWrapper(root_url='')
        expected_text = """\n\n\n\n\n\n\n\n\nCity of Philadelphia \n \n \n \n \nCity of Philadelphia \n- 1 - \n \n \n \nCity Council \nChief Clerk's Office \n402 City Hall \nPhiladelphia, PA 19107 \nRESOLUTION NO. 110406 \n \n \nIntroduced May 12, 2011 \n \n \nCouncilmember DiCicco \n \n \nReferred to the \nCommittee of the Whole   \n \n \nRESOLUTION \n \nAppointing David Campoli to the Board of Directors of the Center City District. \n \n \n \nRESOLVED, BY THE COUNCIL OF THE CITY OF PHILADELPHIA, \nTHAT David Campoli is hereby appointed as a member of the Board of Directors of the \nCenter City District, to serve in a term ending December 31, 2012. \n \n \n\n\n\nCity of Philadelphia \n \nRESOLUTION NO. 110406 continued \n \n \n \n \n \nCity of Philadelphia \n- 2 - \n \n \n \n \n\n"""

        # Raw stream
        resolution_pdf = open(os.path.join(self.pdfs_dir, '11530.pdf')).read()
        resolution_text = wrapper.extract_pdf_text(resolution_pdf)
        self.assertEqual(resolution_text, expected_text)

        # File URL
        resolution_pdf = 'file://' + os.path.join(self.pdfs_dir, '11530.pdf')
        resolution_text = wrapper.extract_pdf_text(resolution_pdf)
        self.assertEqual(resolution_text, expected_text)

        # Web URL -- This will only work if you're online.
        resolution_pdf = 'http://legislation.phila.gov/attachments/11530.pdf'
        resolution_text = wrapper.extract_pdf_text(resolution_pdf)
        self.assertEqual(resolution_text, expected_text)
    def test_DealsWith404PdfAddressesCorrectly(self):
        # I don't know why they'd be deleting these files, but when they do (and
        # they do) we have to handle it.
        wrapper = PhillyLegistarSiteWrapper()
        expected_text = ''

        attachment_pdf = 'http://legislation.phila.gov/attachments/115954.pdf'
        attachment_text = wrapper.extract_pdf_text(attachment_pdf)
        self.assertEqual(attachment_text, expected_text)
    def test_ResolutionPdfParsesCorrectly(self):
        wrapper = PhillyLegistarSiteWrapper()
        expected_text = """\n\n\n\n\n\n\n\n\nCity of Philadelphia \n \n \n \n \nCity of Philadelphia \n- 1 - \n \n \n \nCity Council \nChief Clerk's Office \n402 City Hall \nPhiladelphia, PA 19107 \nRESOLUTION NO. 110406 \n \n \nIntroduced May 12, 2011 \n \n \nCouncilmember DiCicco \n \n \nReferred to the \nCommittee of the Whole   \n \n \nRESOLUTION \n \nAppointing David Campoli to the Board of Directors of the Center City District. \n \n \n \nRESOLVED, BY THE COUNCIL OF THE CITY OF PHILADELPHIA, \nTHAT David Campoli is hereby appointed as a member of the Board of Directors of the \nCenter City District, to serve in a term ending December 31, 2012. \n \n \n\n\n\nCity of Philadelphia \n \nRESOLUTION NO. 110406 continued \n \n \n \n \n \nCity of Philadelphia \n- 2 - \n \n \n \n \n\n"""

        # Raw stream
        resolution_pdf = open(os.path.join(self.pdfs_dir, '11530.pdf')).read()
        resolution_text = wrapper.extract_pdf_text(resolution_pdf)
        self.assertEqual(resolution_text, expected_text)

        # File URL
        resolution_pdf = 'file://' + os.path.join(self.pdfs_dir, '11530.pdf')
        resolution_text = wrapper.extract_pdf_text(resolution_pdf)
        self.assertEqual(resolution_text, expected_text)

        # Web URL -- This will only work if you're online.
        resolution_pdf = 'http://legislation.phila.gov/attachments/11530.pdf'
        resolution_text = wrapper.extract_pdf_text(resolution_pdf)
        self.assertEqual(resolution_text, expected_text)
    def test_MinutesDocumentConstructedCorrectly(self):
        wrapper = PhillyLegistarSiteWrapper()
        wrapper.get_minutes_date = mock.Mock(return_value=dt.date(2083, 12, 6))
        wrapper.extract_pdf_text = mock.Mock(return_value='This is the text')

        expected_doc = {'url': 'http://www.example.com/doc.pdf',
                        'fulltext': 'This is the text',
                        'date_taken': dt.date(2083, 12, 6)}
        minutes_doc = wrapper.get_minutes_doc('http://www.example.com/doc.pdf')

        self.assertEqual(minutes_doc, expected_doc)
    def test_MinutesDocumentConstructedCorrectly(self):
        wrapper = PhillyLegistarSiteWrapper(root_url='')
        wrapper.get_minutes_date = mock.Mock(return_value=dt.date(2083, 12, 6))
        wrapper.extract_pdf_text = mock.Mock(return_value='This is the text')

        expected_doc = {
            'url': 'http://www.example.com/doc.pdf',
            'fulltext': 'This is the text',
            'date_taken': dt.date(2083, 12, 6)
        }
        minutes_doc = wrapper.get_minutes_doc('http://www.example.com/doc.pdf')

        self.assertEqual(minutes_doc, expected_doc)