コード例 #1
0
ファイル: test_kenya_parser.py プロジェクト: Ufadhili/Ajibika
 def _create_source_and_load_test_json_to_entries(self):
     source = Source.objects.create(
         name='Test source',
         url='http://example.com/foo/bar/testing',
         date=datetime.date(2011, 9, 1))
     data = json.loads(open(self.expected_data_json, 'r').read())
     KenyaParser.create_entries_from_data_and_source(data, source)
     return source
コード例 #2
0
ファイル: test_kenya_parser.py プロジェクト: Code4SA/pombola
 def _create_source_and_load_test_json_to_entries(self):
     source   = Source.objects.create(
         name = 'Test source',
         url  = 'http://example.com/foo/bar/testing',
         date = datetime.date( 2011, 9, 1 )
     )
     data = json.loads( open( self.expected_data_json, 'r'  ).read() )
     KenyaParser.create_entries_from_data_and_source( data, source )
     return source
コード例 #3
0
    def handle_noargs(self, **options):

        for source in Source.objects.all().requires_processing():

            if int(options.get('verbosity')) >= 2:
                print "Looking at %s" % source

            source.last_processing_attempt = datetime.datetime.now()
            source.save()

            pdf = source.file()
            html = KenyaParser.convert_pdf_to_html(pdf)
            data = KenyaParser.convert_html_to_data(html)
            KenyaParser.create_entries_from_data_and_source(data, source)
コード例 #4
0
    def handle_noargs(self, **options):

        for source in Source.objects.all().requires_processing():
            
            if int(options.get('verbosity')) >= 2:
                print "Looking at %s" % source

            source.last_processing_attempt = datetime.datetime.now()
            source.save()

            pdf = source.file()
            html = KenyaParser.convert_pdf_to_html( pdf )
            data = KenyaParser.convert_html_to_data( html )
            KenyaParser.create_entries_from_data_and_source( data, source )
コード例 #5
0
ファイル: test_kenya_parser.py プロジェクト: Code4SA/pombola
    def test_converting_pdf_to_html(self):
        """Test that the pdf becomes the html that we expect"""
        pdf_file = open( self.sample_pdf, 'r' )
        html = KenyaParser.convert_pdf_to_html( pdf_file )

        expected_html = open( self.sample_html, 'r' ).read()

        self.assertEqual( html, expected_html )
コード例 #6
0
ファイル: test_kenya_parser.py プロジェクト: Ufadhili/Ajibika
    def test_converting_pdf_to_html(self):
        """Test that the pdf becomes the html that we expect"""
        pdf_file = open(self.sample_pdf, 'r')
        html = KenyaParser.convert_pdf_to_html(pdf_file)

        expected_html = open(self.sample_html, 'r').read()

        self.assertEqual(html, expected_html)
コード例 #7
0
    def handle_noargs(self, **options):

        verbose = int(options.get('verbosity')) >= 2

        for source in Source.objects.all().requires_processing():

            if verbose:
                message = "{0}: Looking at {1}"
                print message.format(source.list_page, source)

            source.last_processing_attempt = datetime.datetime.now()
            source.save()

            pdf = source.file()
            try:
                html = KenyaParser.convert_pdf_to_html( pdf )
                data = KenyaParser.convert_html_to_data( html )
                KenyaParser.create_entries_from_data_and_source( data, source )
            except Exception as e:
                print "There was an exception when parsing {0}".format(pdf)
                raise
コード例 #8
0
ファイル: test_kenya_parser.py プロジェクト: Ufadhili/Ajibika
    def test_parse_time_string(self):

        time_tests = {
            '1.00 p.m.': '13:00:00',
            '1.00 a.m.': '01:00:00',
            '12.00 p.m.':
            '12:00:00',  # am and pm make no sense at noon or midnight - but define what we want to happen
            '12.30 p.m.': '12:30:00',
        }

        for string, output in time_tests.items():
            self.assertEqual(KenyaParser.parse_time_string(string), output)

        self.assertRaises(KenyaParserCouldNotParseTimeString,
                          KenyaParser.parse_time_string, 'foo.bar')
コード例 #9
0
ファイル: test_kenya_parser.py プロジェクト: Ufadhili/Ajibika
    def test_parse_time_string(self):

        time_tests = {
            '1.00 p.m.':  '13:00:00',
            '1.00 a.m.':  '01:00:00',
            '12.00 p.m.': '12:00:00', # am and pm make no sense at noon or midnight - but define what we want to happen
            '12.30 p.m.': '12:30:00',
        }

        for string, output in time_tests.items():
            self.assertEqual( KenyaParser.parse_time_string( string ), output )

        self.assertRaises(
            KenyaParserCouldNotParseTimeString,
            KenyaParser.parse_time_string,
            'foo.bar'
        )
コード例 #10
0
    def test_parse_time_string(self):

        time_tests = {
            '1.00 p.m.': '13:00:00',
            '1.00 a.m.': '01:00:00',
            '12.00 p.m.':
            '12:00:00',  # am and pm make no sense at noon or midnight - but define what we want to happen
            '12.30 p.m.': '12:30:00',
            "twenty-four minutes past Six o'clock": '18:24:00',
            "Fifteen minutes to two o'clock": '13:45:00',
            "Fifty five minutes past Nine o'clock": '09:55:00',
            "One minute past eight o'clock": '08:01:00'
        }

        for string, output in time_tests.items():
            self.assertEqual(KenyaParser.parse_time_string(string), output)

        self.assertRaises(KenyaParserCouldNotParseTimeString,
                          KenyaParser.parse_time_string, 'foo.bar')
コード例 #11
0
ファイル: test_kenya_parser.py プロジェクト: Code4SA/pombola
    def test_parse_time_string(self):

        time_tests = {
            '1.00 p.m.':  '13:00:00',
            '1.00 a.m.':  '01:00:00',
            '12.00 p.m.': '12:00:00', # am and pm make no sense at noon or midnight - but define what we want to happen
            '12.30 p.m.': '12:30:00',
            "twenty-four minutes past Six o'clock" : '18:24:00',
            "Fifteen minutes to two o'clock" : '13:45:00',
            "Fifty five minutes past Nine o'clock" : '09:55:00',
            "One minute past eight o'clock": '08:01:00'
        }

        for string, output in time_tests.items():
            self.assertEqual( KenyaParser.parse_time_string( string ), output )

        self.assertRaises(
            KenyaParserCouldNotParseTimeString,
            KenyaParser.parse_time_string,
            'foo.bar'
        )
コード例 #12
0
ファイル: test_kenya_parser.py プロジェクト: Code4SA/pombola
    def test_converting_html_to_data(self):
        """test the convert_pdf_to_data function"""

        html_file = open( self.sample_html, 'r')
        html = html_file.read()

        data = KenyaParser.convert_html_to_data( html=html )

        # Whilst developing the code this proved useful
        # out = open( self.expected_data_json, 'w')
        # json_string = json.dumps( data, sort_keys=True, indent=4 )
        # json_string = re.sub(r" +\n", "\n", json_string) # trim trailing whitespace
        # json_string += "\n"
        # out.write( json_string )
        # out.close()

        expected = json.loads( open( self.expected_data_json, 'r'  ).read() )

        self.assertEqual( data['transcript'], expected['transcript'] )

        # FIXME
        self.assertEqual( data['meta'], expected['meta'] )
コード例 #13
0
ファイル: test_kenya_parser.py プロジェクト: Ufadhili/Ajibika
    def test_converting_html_to_data(self):
        """test the convert_pdf_to_data function"""

        html_file = open(self.sample_html, 'r')
        html = html_file.read()

        data = KenyaParser.convert_html_to_data(html=html)

        # Whilst developing the code this proved useful
        # out = open( self.expected_data_json, 'w')
        # json_string = json.dumps( data, sort_keys=True, indent=4 )
        # json_string = re.sub(r" +\n", "\n", json_string) # trim trailing whitespace
        # json_string += "\n"
        # out.write( json_string )
        # out.close()

        expected = json.loads(open(self.expected_data_json, 'r').read())

        self.assertEqual(data['transcript'], expected['transcript'])

        # FIXME
        self.assertEqual(data['meta'], expected['meta'])