コード例 #1
0
ファイル: tests.py プロジェクト: Code4SA/pombola
 def setUpClass(cls):
     cls.head, cls.entries = parse(open(cls.sample, "r").read())
コード例 #2
0
 def setUpClass(cls):
     super(GhanaParserTest, cls).setUpClass()
     cls.head, cls.entries = parse(open(cls.sample, 'r').read())
コード例 #3
0
ファイル: tests.py プロジェクト: Code4SA/pombola
    def test_entire_output(self):
        """
        For the sample files that we have parse them and then compare the
        results to those stored in JSON, and to the normalised line breaks. This
        will allow us to quickly spot changes that are not individually tested.

        Note that there is a flag that can be used to write the new output to
        disk. This can be used to update the test data and, and also to make it
        possible to use a diff tool to see the changes more clearly than is
        possible in the failing test output.
        """

        # change to True to update the test json files.
        overwrite_known_good_files = False

        # list of all the files that we should parse and compare (path should
        # be relative to this test file).
        transcript_files = [
            "data/hansard-sample.txt",
            "data/hansards/hansard_0014.txt",
            "data/hansards/hansard_0025.txt",
            "data/hansards/hansard_0026.txt",
            "data/hansards/hansard_0028.txt",
            "data/hansards/hansard_0029.txt",
            "data/hansards/hansard_0030.txt",
            "data/hansards/hansard_0031.txt",
            "data/hansards/hansard_0032.txt",
            "data/hansards/hansard_0034.txt",
            "data/hansards/hansard_0038.txt",
            "data/hansards/hansard_0051.txt",
            "data/hansards/hansard_0075.txt",
            "data/hansards/hansard_0077.txt",
            "data/hansards/hansard_0078.txt",
        ]

        for transcript_file in transcript_files:
            transcript_abs_path = os.path.join(os.path.dirname(__file__), transcript_file)
            normalised_abs_path = os.path.splitext(transcript_abs_path)[0] + "-normalised.txt"
            data_abs_path = os.path.splitext(transcript_abs_path)[0] + ".json"

            # Read the sample content
            sample_content = open(transcript_abs_path, "r").read()

            # normalise the line breaks
            normalised_sample_content = normalise_line_breaks(sample_content)

            # parse sample content, store in data structure
            head, entries = parse(sample_content)
            parsed_data = {"head": head, "entries": entries}
            parsed_data_as_json = self.convert_parsed_data_to_json(parsed_data)

            # Write this parsed data out to disk if desired - this should
            # normally not happen, but is convenient to do during development
            if overwrite_known_good_files:
                print "** WARNING - overwriting known good files for '%s' ***" % transcript_abs_path
                open(data_abs_path, "w").write(parsed_data_as_json)
                open(normalised_abs_path, "w").write(normalised_sample_content)

            # check that the line splitting works as expected
            self.assertEqual(open(normalised_abs_path, "r").read().strip(), normalised_sample_content)

            # Read in the expected data and compare to what we got from parsing
            expected_data = json.loads(open(data_abs_path, "r").read())
            self.assertEqual(
                json.loads(parsed_data_as_json),  # so datetimes are iso formatted
                expected_data
                # "Correctly parsed %s" % transcript_file
            )
コード例 #4
0
    def test_entire_output(self):
        """
        For the sample files that we have parse them and then compare the
        results to those stored in JSON, and to the normalised line breaks. This
        will allow us to quickly spot changes that are not individually tested.

        Note that there is a flag that can be used to write the new output to
        disk. This can be used to update the test data and, and also to make it
        possible to use a diff tool to see the changes more clearly than is
        possible in the failing test output.
        """
        
        # change to True to update the test json files.
        overwrite_known_good_files = False
        
        # list of all the files that we should parse and compare (path should
        # be relative to this test file).
        transcript_files = [
            'data/hansard-sample.txt',
            'data/hansards/hansard_0014.txt',
            'data/hansards/hansard_0025.txt',
            'data/hansards/hansard_0026.txt',
            'data/hansards/hansard_0028.txt',
            'data/hansards/hansard_0029.txt',
            'data/hansards/hansard_0030.txt',
            'data/hansards/hansard_0031.txt',
            'data/hansards/hansard_0032.txt',
            'data/hansards/hansard_0034.txt',
            'data/hansards/hansard_0038.txt',
            'data/hansards/hansard_0051.txt',
            'data/hansards/hansard_0075.txt',
            'data/hansards/hansard_0077.txt',
            'data/hansards/hansard_0078.txt',
        ] 
        
        for transcript_file in transcript_files:
            transcript_abs_path = os.path.join(os.path.dirname(__file__), transcript_file)
            normalised_abs_path = os.path.splitext(transcript_abs_path)[0] + '-normalised.txt'
            data_abs_path       = os.path.splitext(transcript_abs_path)[0] + '.json'

            # Read the sample content
            sample_content = open(transcript_abs_path, 'r').read()

            # normalise the line breaks
            normalised_sample_content = normalise_line_breaks( sample_content )

            # parse sample content, store in data structure
            head, entries = parse(sample_content)
            parsed_data = { 'head': head, 'entries': entries }
            parsed_data_as_json = self.convert_parsed_data_to_json( parsed_data )
            
            # Write this parsed data out to disk if desired - this should
            # normally not happen, but is convenient to do during development
            if overwrite_known_good_files:
                print "** WARNING - overwriting known good files for '%s' ***" % transcript_abs_path
                open(data_abs_path,       'w').write( parsed_data_as_json )
                open(normalised_abs_path, 'w').write( normalised_sample_content )
            
            # check that the line splitting works as expected
            self.assertEqual(
                open(normalised_abs_path, 'r').read().strip(),
                normalised_sample_content
            )

            # Read in the expected data and compare to what we got from parsing
            expected_data = json.loads( open( data_abs_path, 'r').read() )
            self.assertEqual(
                json.loads( parsed_data_as_json ), # so datetimes are iso formatted
                expected_data
                # "Correctly parsed %s" % transcript_file
            )