Exemple #1
0
    def test_parse(self,
                   name: str,
                   value_parsers: List[ValueParser],
                   parser_options: ParserOptions,
                   skip_incorrect_lines: bool,
                   line: str,
                   expected_result: List[str],
                   expected_exception_type: Optional[Type] = None) -> None:
        # Arrange
        line_parser = LineParser(value_parsers, parser_options,
                                 skip_incorrect_lines)
        file = create_autospec(TextReader)
        input_line = Line(file=file, index=0, header=False, line=line)

        try:
            # Act
            parsed_line = line_parser.parse(input_line)

            # Assert
            self.assertEqual(input_line.index, parsed_line.index)
            self.assertEqual(input_line.header, parsed_line.header)
            self.assertEqual(input_line.line, parsed_line.line)
            self.assertEqual(expected_result, parsed_line.parsed_values)
        except Exception as exception:
            if expected_exception_type:
                self.assertIsInstance(exception, expected_exception_type)
            else:
                raise
class FileProcessorFactoryTest(TestCase):
    @parameterized.expand(
        [[
            'parser without single-value parsers',
            FileParser(LineParser([], ParserOptions()), ParserOptions())
        ],
         [
             'parser with single-value parsers',
             FileParser(
                 LineParser([StringParser(),
                             NumberParser(),
                             NumberParser()], ParserOptions()),
                 ParserOptions())
         ]])
    def test_create(self, name: str, file_parser: FileParser) -> None:
        # Arrangement
        file_parser_factory_mock = create_autospec(FileParserFactory)
        file_parser_factory_mock.create = MagicMock(return_value=file_parser)
        factory = FileProcessorFactory(file_parser_factory_mock)

        # Act
        result = factory.create('', ParserOptions())

        # Assert
        self.assertIsInstance(result, FileProcessor)
        self.assertIsInstance(result.line_processor, LineProcessor)
        self.assertIsNotNone(result.line_processor.value_processors)
        self.assertEqual(len(file_parser.line_parser.value_parsers),
                         len(result.line_processor.value_processors))
Exemple #3
0
    def test_split(self, name: str, string: str, parser_options: ParserOptions,
                   expected_result: List[str]) -> None:
        # Act
        result = LineParser.split(string, parser_options)

        # Assert
        self.assertEqual(expected_result, result)
    def _parse(self, line: Line, values: List[str]) -> List[str]:
        # First Let's check whether is our case and we can handle it
        if len(values) != 2:
            raise ParsingError(f'Line # {line.index}: {line.line}. '
                               f'Expected 2 fields, got {len(values)}')

        # We got the first field
        record_id = self._value_parsers[0].parse(values[0])

        # And the first letter of the name
        name_first_letter = values[1]

        # Let's scan the next line and get its fields
        next_line = line.file.read_line()
        next_line_fields = LineParser.split(next_line, self._options)

        # We need to check whether the next line contains all the remaining fields
        if len(next_line_fields) != 3:
            raise ParsingError(
                f'Line # {line.index}: {next_line}. '
                f'Expected 3 fields, got {len(next_line_fields)}')

        # Let's parse all the remaining fields
        name = self._value_parsers[1].parse(name_first_letter +
                                            next_line_fields[0])
        age = self._value_parsers[2].parse(next_line_fields[1])
        salary = self._value_parsers[3].parse(next_line_fields[2])

        return [record_id, name, age, salary]
    def create(self, input_file_path: str,
               options: ParserOptions) -> FileParser:
        # Let's define single-value parsers
        value_parsers = [
            NumberParser(),  # Record ID
            StringParser(),  # Name
            NumberParser(),  # Age
            NumberParser()  # Salary
        ]

        # Let's define a line parser as a chain of parsers
        line_parser = LineParser(
            value_parsers,
            options,
            skip_incorrect_lines=False,
            next_line_parser=LineWithIDAndNameFirstLetterParser(
                value_parsers,
                options,
                skip_incorrect_lines=False,
                next_line_parser=IDAndNameLineParser(
                    value_parsers, options, skip_incorrect_lines=False)))

        # Finally let's create a file parser
        file_parser = FileParser(line_parser, options)

        return file_parser
    def _parse(self, line: Line, values: List[str]) -> List[str]:
        # First Let's check whether is our case and we can handle it
        if len(values) != 2:
            raise ParsingError(f'Line # {line.index}: {line.line}. '
                               f'Expected 2 fields, got {len(values)}')

        # We got the record ID
        record_id = self._value_parsers[0].parse(values[0])

        # And the name too
        name = self._value_parsers[1].parse(values[1])

        # Let's skip empty lines
        next_line = line.file.read_line()

        while next_line.strip() == '':
            next_line = line.file.read_line()

        # We got the next line with data, let's split it
        next_line_fields = LineParser.split(next_line, self._options)

        # We need to check whether the next line contains all the remaining fields
        if len(next_line_fields) != 2:
            raise ValueError(
                f'Line # {line.file.current_line_index}: {next_line}. '
                f'Expected 2 columns, got {len(next_line_fields)}')

        # Let's parse all the remaining fields
        age = self._value_parsers[2].parse(next_line_fields[0])
        salary = self._value_parsers[3].parse(next_line_fields[1])

        return [record_id, name, age, salary]
Exemple #7
0
    def test_parse_uses_next_line_parser(self) -> None:
        # Arrange
        line = 'abc'
        next_line_parser = LineParser([StringParser()], ParserOptions(), False)
        file = create_autospec(TextReader)
        input_line = Line(file=file, index=0, header=False, line=line)
        next_line_parser_mock = mock.create_autospec(LineParser)
        next_line_parser_mock.parse = MagicMock(
            side_effect=lambda l: next_line_parser.parse(l))
        line_parser = LineParser([NumberParser()], ParserOptions(), False,
                                 next_line_parser_mock)

        # Act
        parsed_line = line_parser.parse(input_line)

        # Assert
        self.assertIsNotNone(parsed_line.parsed_values)
        self.assertEqual(1, len(parsed_line.parsed_values))
        self.assertEqual(line, parsed_line.parsed_values[0])
        next_line_parser_mock.parse.assert_called_once()
Exemple #8
0
class FileParserTest(TestCase):
    @parameterized.expand(
        [[
            'empty file',
            LineParser([], ParserOptions()),
            ParserOptions(), '', []
        ],
         [
             'file with one header',
             LineParser([], ParserOptions()),
             ParserOptions(), 'name\tage\tsalary', [None]
         ],
         [
             'file with one header and one data line',
             LineParser([StringParser(),
                         NumberParser(),
                         NumberParser()], ParserOptions()),
             ParserOptions(), 'name\tage\tsalary\nJohn Doe\t23\t10,000',
             [None, ['John Doe', '23', '10,000']]
         ],
         [
             'file with two header lines and one data line',
             LineParser([StringParser(),
                         NumberParser(),
                         NumberParser()], ParserOptions(header_lines=2)),
             ParserOptions(header_lines=2),
             'Personnel\nname\tage\tsalary\nJohn Doe\t23\t10,000',
             [None, None, ['John Doe', '23', '10,000']]
         ],
         [
             'file with one header and two data lines',
             LineParser([StringParser(),
                         NumberParser(),
                         NumberParser()], ParserOptions()),
             ParserOptions(),
             'name\tage\tsalary\nJohn Doe\t23\t10,000\nBob Doe\t30\t15,000',
             [None, ['John Doe', '23', '10,000'], ['Bob Doe', '30', '15,000']]
         ]])
    def test_parse(self,
                   name: str,
                   line_parser: LineParser,
                   parser_options: ParserOptions,
                   data: str,
                   expected_result: List[Optional[List[str]]],
                   expected_exception_type: Optional[Type] = None) -> None:
        file_parser = FileParser(line_parser, parser_options)

        with mock_builtin_open(data=data):
            try:
                result_iterator = file_parser.parse('')
                result = list(result_iterator)

                self.assertEqual(len(expected_result), len(result))

                for expected_line, line in zip(expected_result, result):
                    self.assertIsInstance(line, ParsedLine)
                    self.assertEqual(expected_line, line.parsed_values)
            except Exception as exception:
                if expected_exception_type:
                    self.assertIsInstance(exception, expected_exception_type)
                else:
                    raise