Exemple #1
0
    def handle_noargs(self, **options):

        for source in Source.objects.all().requires_processing():

            if int(options.get('verbosity')) >= 2:
                print "Looking at %s" % source

            source.last_processing_attempt = datetime.datetime.now()
            source.save()

            pdf = source.file()
            html = KenyaParser.convert_pdf_to_html(pdf)
            data = KenyaParser.convert_html_to_data(html)
            KenyaParser.create_entries_from_data_and_source(data, source)
    def handle_noargs(self, **options):

        for source in Source.objects.all().requires_processing():
            
            if int(options.get('verbosity')) >= 2:
                print "Looking at %s" % source

            source.last_processing_attempt = datetime.datetime.now()
            source.save()

            pdf = source.file()
            html = KenyaParser.convert_pdf_to_html( pdf )
            data = KenyaParser.convert_html_to_data( html )
            KenyaParser.create_entries_from_data_and_source( data, source )
    def test_converting_html_to_data(self):
        """test the convert_pdf_to_data function"""
        
        html_file = open( self.sample_html, 'r')
        html = html_file.read()

        data = KenyaParser.convert_html_to_data( html=html )
                
        # Whilst developing the code this proved useful (on a mac at least)
        # tmp = tempfile.NamedTemporaryFile( delete=False, suffix=".json" )
        # tmp = open( '/tmp/mzalend_hansard_parse.json', 'w')
        # tmp.write( json.dumps( data, sort_keys=True, indent=4 ) )
        # tmp.close()        
        # subprocess.call(['open', tmp.name ])
                
        expected = json.loads( open( self.expected_data_json, 'r'  ).read() )
        
        self.assertEqual( data['transcript'], expected['transcript'] )

        # FIXME
        self.assertEqual( data['meta'], expected['meta'] )
Exemple #4
0
    def test_converting_html_to_data(self):
        """test the convert_pdf_to_data function"""

        html_file = open(self.sample_html, 'r')
        html = html_file.read()

        data = KenyaParser.convert_html_to_data(html=html)

        # Whilst developing the code this proved useful (on a mac at least)
        # tmp = tempfile.NamedTemporaryFile( delete=False, suffix=".json" )
        # tmp = open( '/tmp/mzalend_hansard_parse.json', 'w')
        # tmp.write( json.dumps( data, sort_keys=True, indent=4 ) )
        # tmp.close()
        # subprocess.call(['open', tmp.name ])

        expected = json.loads(open(self.expected_data_json, 'r').read())

        self.assertEqual(data['transcript'], expected['transcript'])

        # FIXME
        self.assertEqual(data['meta'], expected['meta'])