def test_parse(self, name: str, value_parsers: List[ValueParser], parser_options: ParserOptions, skip_incorrect_lines: bool, line: str, expected_result: List[str], expected_exception_type: Optional[Type] = None) -> None: # Arrange line_parser = LineParser(value_parsers, parser_options, skip_incorrect_lines) file = create_autospec(TextReader) input_line = Line(file=file, index=0, header=False, line=line) try: # Act parsed_line = line_parser.parse(input_line) # Assert self.assertEqual(input_line.index, parsed_line.index) self.assertEqual(input_line.header, parsed_line.header) self.assertEqual(input_line.line, parsed_line.line) self.assertEqual(expected_result, parsed_line.parsed_values) except Exception as exception: if expected_exception_type: self.assertIsInstance(exception, expected_exception_type) else: raise
class FileProcessorFactoryTest(TestCase): @parameterized.expand( [[ 'parser without single-value parsers', FileParser(LineParser([], ParserOptions()), ParserOptions()) ], [ 'parser with single-value parsers', FileParser( LineParser([StringParser(), NumberParser(), NumberParser()], ParserOptions()), ParserOptions()) ]]) def test_create(self, name: str, file_parser: FileParser) -> None: # Arrangement file_parser_factory_mock = create_autospec(FileParserFactory) file_parser_factory_mock.create = MagicMock(return_value=file_parser) factory = FileProcessorFactory(file_parser_factory_mock) # Act result = factory.create('', ParserOptions()) # Assert self.assertIsInstance(result, FileProcessor) self.assertIsInstance(result.line_processor, LineProcessor) self.assertIsNotNone(result.line_processor.value_processors) self.assertEqual(len(file_parser.line_parser.value_parsers), len(result.line_processor.value_processors))
def test_split(self, name: str, string: str, parser_options: ParserOptions, expected_result: List[str]) -> None: # Act result = LineParser.split(string, parser_options) # Assert self.assertEqual(expected_result, result)
def _parse(self, line: Line, values: List[str]) -> List[str]: # First Let's check whether is our case and we can handle it if len(values) != 2: raise ParsingError(f'Line # {line.index}: {line.line}. ' f'Expected 2 fields, got {len(values)}') # We got the first field record_id = self._value_parsers[0].parse(values[0]) # And the first letter of the name name_first_letter = values[1] # Let's scan the next line and get its fields next_line = line.file.read_line() next_line_fields = LineParser.split(next_line, self._options) # We need to check whether the next line contains all the remaining fields if len(next_line_fields) != 3: raise ParsingError( f'Line # {line.index}: {next_line}. ' f'Expected 3 fields, got {len(next_line_fields)}') # Let's parse all the remaining fields name = self._value_parsers[1].parse(name_first_letter + next_line_fields[0]) age = self._value_parsers[2].parse(next_line_fields[1]) salary = self._value_parsers[3].parse(next_line_fields[2]) return [record_id, name, age, salary]
def create(self, input_file_path: str, options: ParserOptions) -> FileParser: # Let's define single-value parsers value_parsers = [ NumberParser(), # Record ID StringParser(), # Name NumberParser(), # Age NumberParser() # Salary ] # Let's define a line parser as a chain of parsers line_parser = LineParser( value_parsers, options, skip_incorrect_lines=False, next_line_parser=LineWithIDAndNameFirstLetterParser( value_parsers, options, skip_incorrect_lines=False, next_line_parser=IDAndNameLineParser( value_parsers, options, skip_incorrect_lines=False))) # Finally let's create a file parser file_parser = FileParser(line_parser, options) return file_parser
def _parse(self, line: Line, values: List[str]) -> List[str]: # First Let's check whether is our case and we can handle it if len(values) != 2: raise ParsingError(f'Line # {line.index}: {line.line}. ' f'Expected 2 fields, got {len(values)}') # We got the record ID record_id = self._value_parsers[0].parse(values[0]) # And the name too name = self._value_parsers[1].parse(values[1]) # Let's skip empty lines next_line = line.file.read_line() while next_line.strip() == '': next_line = line.file.read_line() # We got the next line with data, let's split it next_line_fields = LineParser.split(next_line, self._options) # We need to check whether the next line contains all the remaining fields if len(next_line_fields) != 2: raise ValueError( f'Line # {line.file.current_line_index}: {next_line}. ' f'Expected 2 columns, got {len(next_line_fields)}') # Let's parse all the remaining fields age = self._value_parsers[2].parse(next_line_fields[0]) salary = self._value_parsers[3].parse(next_line_fields[1]) return [record_id, name, age, salary]
def test_parse_uses_next_line_parser(self) -> None: # Arrange line = 'abc' next_line_parser = LineParser([StringParser()], ParserOptions(), False) file = create_autospec(TextReader) input_line = Line(file=file, index=0, header=False, line=line) next_line_parser_mock = mock.create_autospec(LineParser) next_line_parser_mock.parse = MagicMock( side_effect=lambda l: next_line_parser.parse(l)) line_parser = LineParser([NumberParser()], ParserOptions(), False, next_line_parser_mock) # Act parsed_line = line_parser.parse(input_line) # Assert self.assertIsNotNone(parsed_line.parsed_values) self.assertEqual(1, len(parsed_line.parsed_values)) self.assertEqual(line, parsed_line.parsed_values[0]) next_line_parser_mock.parse.assert_called_once()
class FileParserTest(TestCase): @parameterized.expand( [[ 'empty file', LineParser([], ParserOptions()), ParserOptions(), '', [] ], [ 'file with one header', LineParser([], ParserOptions()), ParserOptions(), 'name\tage\tsalary', [None] ], [ 'file with one header and one data line', LineParser([StringParser(), NumberParser(), NumberParser()], ParserOptions()), ParserOptions(), 'name\tage\tsalary\nJohn Doe\t23\t10,000', [None, ['John Doe', '23', '10,000']] ], [ 'file with two header lines and one data line', LineParser([StringParser(), NumberParser(), NumberParser()], ParserOptions(header_lines=2)), ParserOptions(header_lines=2), 'Personnel\nname\tage\tsalary\nJohn Doe\t23\t10,000', [None, None, ['John Doe', '23', '10,000']] ], [ 'file with one header and two data lines', LineParser([StringParser(), NumberParser(), NumberParser()], ParserOptions()), ParserOptions(), 'name\tage\tsalary\nJohn Doe\t23\t10,000\nBob Doe\t30\t15,000', [None, ['John Doe', '23', '10,000'], ['Bob Doe', '30', '15,000']] ]]) def test_parse(self, name: str, line_parser: LineParser, parser_options: ParserOptions, data: str, expected_result: List[Optional[List[str]]], expected_exception_type: Optional[Type] = None) -> None: file_parser = FileParser(line_parser, parser_options) with mock_builtin_open(data=data): try: result_iterator = file_parser.parse('') result = list(result_iterator) self.assertEqual(len(expected_result), len(result)) for expected_line, line in zip(expected_result, result): self.assertIsInstance(line, ParsedLine) self.assertEqual(expected_line, line.parsed_values) except Exception as exception: if expected_exception_type: self.assertIsInstance(exception, expected_exception_type) else: raise