コード例 #1
0
    def handle_noargs(self, **options):

        for source in Source.objects.all().requires_processing():

            if int(options.get('verbosity')) >= 2:
                print "Looking at %s" % source

            source.last_processing_attempt = datetime.datetime.now()
            source.save()

            pdf = source.file()
            html = KenyaParser.convert_pdf_to_html(pdf)
            data = KenyaParser.convert_html_to_data(html)
            KenyaParser.create_entries_from_data_and_source(data, source)
コード例 #2
0
    def handle_noargs(self, **options):

        for source in Source.objects.all().requires_processing():
            
            if int(options.get('verbosity')) >= 2:
                print "Looking at %s" % source

            source.last_processing_attempt = datetime.datetime.now()
            source.save()

            pdf = source.file()
            html = KenyaParser.convert_pdf_to_html( pdf )
            data = KenyaParser.convert_html_to_data( html )
            KenyaParser.create_entries_from_data_and_source( data, source )
コード例 #3
0
    def handle_noargs(self, **options):

        verbose = int(options.get('verbosity')) >= 2

        for source in Source.objects.all().requires_processing():

            if verbose:
                message = "{0}: Looking at {1}"
                print message.format(source.list_page, source)

            source.last_processing_attempt = datetime.datetime.now()
            source.save()

            pdf = source.file()
            try:
                html = KenyaParser.convert_pdf_to_html( pdf )
                data = KenyaParser.convert_html_to_data( html )
                KenyaParser.create_entries_from_data_and_source( data, source )
            except Exception as e:
                print "There was an exception when parsing {0}".format(pdf)
                raise
コード例 #4
0
ファイル: test_kenya_parser.py プロジェクト: Code4SA/pombola
    def test_converting_html_to_data(self):
        """test the convert_pdf_to_data function"""

        html_file = open( self.sample_html, 'r')
        html = html_file.read()

        data = KenyaParser.convert_html_to_data( html=html )

        # Whilst developing the code this proved useful
        # out = open( self.expected_data_json, 'w')
        # json_string = json.dumps( data, sort_keys=True, indent=4 )
        # json_string = re.sub(r" +\n", "\n", json_string) # trim trailing whitespace
        # json_string += "\n"
        # out.write( json_string )
        # out.close()

        expected = json.loads( open( self.expected_data_json, 'r'  ).read() )

        self.assertEqual( data['transcript'], expected['transcript'] )

        # FIXME
        self.assertEqual( data['meta'], expected['meta'] )
コード例 #5
0
ファイル: test_kenya_parser.py プロジェクト: Ufadhili/Ajibika
    def test_converting_html_to_data(self):
        """test the convert_pdf_to_data function"""

        html_file = open(self.sample_html, 'r')
        html = html_file.read()

        data = KenyaParser.convert_html_to_data(html=html)

        # Whilst developing the code this proved useful
        # out = open( self.expected_data_json, 'w')
        # json_string = json.dumps( data, sort_keys=True, indent=4 )
        # json_string = re.sub(r" +\n", "\n", json_string) # trim trailing whitespace
        # json_string += "\n"
        # out.write( json_string )
        # out.close()

        expected = json.loads(open(self.expected_data_json, 'r').read())

        self.assertEqual(data['transcript'], expected['transcript'])

        # FIXME
        self.assertEqual(data['meta'], expected['meta'])