Python parseの例

プログラミング言語: Python

名前空間/パッケージ名: management.hansard_parser

メソッド/関数: parse

hotexamples.comのコード掲載数: 4

Python parse - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのmanagement.hansard_parser.parseの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: tests.py プロジェクト: Code4SA/pombola

 def setUpClass(cls):
     cls.head, cls.entries = parse(open(cls.sample, "r").read())

コード例 #2

ファイルを表示

 def setUpClass(cls):
     super(GhanaParserTest, cls).setUpClass()
     cls.head, cls.entries = parse(open(cls.sample, 'r').read())

コード例 #3

ファイルを表示

ファイル: tests.py プロジェクト: Code4SA/pombola

    def test_entire_output(self):
        """
        For the sample files that we have parse them and then compare the
        results to those stored in JSON, and to the normalised line breaks. This
        will allow us to quickly spot changes that are not individually tested.

        Note that there is a flag that can be used to write the new output to
        disk. This can be used to update the test data and, and also to make it
        possible to use a diff tool to see the changes more clearly than is
        possible in the failing test output.
        """

        # change to True to update the test json files.
        overwrite_known_good_files = False

        # list of all the files that we should parse and compare (path should
        # be relative to this test file).
        transcript_files = [
            "data/hansard-sample.txt",
            "data/hansards/hansard_0014.txt",
            "data/hansards/hansard_0025.txt",
            "data/hansards/hansard_0026.txt",
            "data/hansards/hansard_0028.txt",
            "data/hansards/hansard_0029.txt",
            "data/hansards/hansard_0030.txt",
            "data/hansards/hansard_0031.txt",
            "data/hansards/hansard_0032.txt",
            "data/hansards/hansard_0034.txt",
            "data/hansards/hansard_0038.txt",
            "data/hansards/hansard_0051.txt",
            "data/hansards/hansard_0075.txt",
            "data/hansards/hansard_0077.txt",
            "data/hansards/hansard_0078.txt",
        ]

        for transcript_file in transcript_files:
            transcript_abs_path = os.path.join(os.path.dirname(__file__), transcript_file)
            normalised_abs_path = os.path.splitext(transcript_abs_path)[0] + "-normalised.txt"
            data_abs_path = os.path.splitext(transcript_abs_path)[0] + ".json"

            # Read the sample content
            sample_content = open(transcript_abs_path, "r").read()

            # normalise the line breaks
            normalised_sample_content = normalise_line_breaks(sample_content)

            # parse sample content, store in data structure
            head, entries = parse(sample_content)
            parsed_data = {"head": head, "entries": entries}
            parsed_data_as_json = self.convert_parsed_data_to_json(parsed_data)

            # Write this parsed data out to disk if desired - this should
            # normally not happen, but is convenient to do during development
            if overwrite_known_good_files:
                print "** WARNING - overwriting known good files for '%s' ***" % transcript_abs_path
                open(data_abs_path, "w").write(parsed_data_as_json)
                open(normalised_abs_path, "w").write(normalised_sample_content)

            # check that the line splitting works as expected
            self.assertEqual(open(normalised_abs_path, "r").read().strip(), normalised_sample_content)

            # Read in the expected data and compare to what we got from parsing
            expected_data = json.loads(open(data_abs_path, "r").read())
            self.assertEqual(
                json.loads(parsed_data_as_json),  # so datetimes are iso formatted
                expected_data
                # "Correctly parsed %s" % transcript_file
            )

コード例 #4

ファイルを表示

    def test_entire_output(self):
        """
        For the sample files that we have parse them and then compare the
        results to those stored in JSON, and to the normalised line breaks. This
        will allow us to quickly spot changes that are not individually tested.

        Note that there is a flag that can be used to write the new output to
        disk. This can be used to update the test data and, and also to make it
        possible to use a diff tool to see the changes more clearly than is
        possible in the failing test output.
        """
        
        # change to True to update the test json files.
        overwrite_known_good_files = False
        
        # list of all the files that we should parse and compare (path should
        # be relative to this test file).
        transcript_files = [
            'data/hansard-sample.txt',
            'data/hansards/hansard_0014.txt',
            'data/hansards/hansard_0025.txt',
            'data/hansards/hansard_0026.txt',
            'data/hansards/hansard_0028.txt',
            'data/hansards/hansard_0029.txt',
            'data/hansards/hansard_0030.txt',
            'data/hansards/hansard_0031.txt',
            'data/hansards/hansard_0032.txt',
            'data/hansards/hansard_0034.txt',
            'data/hansards/hansard_0038.txt',
            'data/hansards/hansard_0051.txt',
            'data/hansards/hansard_0075.txt',
            'data/hansards/hansard_0077.txt',
            'data/hansards/hansard_0078.txt',
        ] 
        
        for transcript_file in transcript_files:
            transcript_abs_path = os.path.join(os.path.dirname(__file__), transcript_file)
            normalised_abs_path = os.path.splitext(transcript_abs_path)[0] + '-normalised.txt'
            data_abs_path       = os.path.splitext(transcript_abs_path)[0] + '.json'

            # Read the sample content
            sample_content = open(transcript_abs_path, 'r').read()

            # normalise the line breaks
            normalised_sample_content = normalise_line_breaks( sample_content )

            # parse sample content, store in data structure
            head, entries = parse(sample_content)
            parsed_data = { 'head': head, 'entries': entries }
            parsed_data_as_json = self.convert_parsed_data_to_json( parsed_data )
            
            # Write this parsed data out to disk if desired - this should
            # normally not happen, but is convenient to do during development
            if overwrite_known_good_files:
                print "** WARNING - overwriting known good files for '%s' ***" % transcript_abs_path
                open(data_abs_path,       'w').write( parsed_data_as_json )
                open(normalised_abs_path, 'w').write( normalised_sample_content )
            
            # check that the line splitting works as expected
            self.assertEqual(
                open(normalised_abs_path, 'r').read().strip(),
                normalised_sample_content
            )

            # Read in the expected data and compare to what we got from parsing
            expected_data = json.loads( open( data_abs_path, 'r').read() )
            self.assertEqual(
                json.loads( parsed_data_as_json ), # so datetimes are iso formatted
                expected_data
                # "Correctly parsed %s" % transcript_file
            )