Exemple #1
0
    def test_converting_pdf_to_html(self):
        """Test that the pdf becomes the html that we expect"""
        pdf_file = open( self.sample_pdf, 'r' )
        html = KenyaParser.convert_pdf_to_html( pdf_file )

        expected_html = open( self.sample_html, 'r' ).read()

        self.assertEqual( html, expected_html )
    def test_converting_pdf_to_html(self):
        """Test that the pdf becomes the html that we expect"""
        pdf_file = open(self.sample_pdf, 'r')
        html = KenyaParser.convert_pdf_to_html(pdf_file)

        expected_html = open(self.sample_html, 'r').read()

        self.assertEqual(html, expected_html)
Exemple #3
0
    def handle_noargs(self, **options):

        for source in Source.objects.all().requires_processing():

            if int(options.get('verbosity')) >= 2:
                print "Looking at %s" % source

            source.last_processing_attempt = datetime.datetime.now()
            source.save()

            pdf = source.file()
            html = KenyaParser.convert_pdf_to_html(pdf)
            data = KenyaParser.convert_html_to_data(html)
            KenyaParser.create_entries_from_data_and_source(data, source)
    def handle_noargs(self, **options):

        for source in Source.objects.all().requires_processing():
            
            if int(options.get('verbosity')) >= 2:
                print "Looking at %s" % source

            source.last_processing_attempt = datetime.datetime.now()
            source.save()

            pdf = source.file()
            html = KenyaParser.convert_pdf_to_html( pdf )
            data = KenyaParser.convert_html_to_data( html )
            KenyaParser.create_entries_from_data_and_source( data, source )
Exemple #5
0
    def handle_noargs(self, **options):

        verbose = int(options.get('verbosity')) >= 2

        for source in Source.objects.all().requires_processing():

            if verbose:
                message = "{0}: Looking at {1}"
                print message.format(source.list_page, source)

            source.last_processing_attempt = datetime.datetime.now()
            source.save()

            pdf = source.file()
            try:
                html = KenyaParser.convert_pdf_to_html( pdf )
                data = KenyaParser.convert_html_to_data( html )
                KenyaParser.create_entries_from_data_and_source( data, source )
            except Exception as e:
                print "There was an exception when parsing {0}".format(pdf)
                raise