class FileProcessorFactoryTest(TestCase): @parameterized.expand( [[ 'parser without single-value parsers', FileParser(LineParser([], ParserOptions()), ParserOptions()) ], [ 'parser with single-value parsers', FileParser( LineParser([StringParser(), NumberParser(), NumberParser()], ParserOptions()), ParserOptions()) ]]) def test_create(self, name: str, file_parser: FileParser) -> None: # Arrangement file_parser_factory_mock = create_autospec(FileParserFactory) file_parser_factory_mock.create = MagicMock(return_value=file_parser) factory = FileProcessorFactory(file_parser_factory_mock) # Act result = factory.create('', ParserOptions()) # Assert self.assertIsInstance(result, FileProcessor) self.assertIsInstance(result.line_processor, LineProcessor) self.assertIsNotNone(result.line_processor.value_processors) self.assertEqual(len(file_parser.line_parser.value_parsers), len(result.line_processor.value_processors))
class FileParserFactoryTest(TestCase): @parameterized.expand( [['empty string', ParserOptions(), '', [], ParsingError], [ 'string with numbers', ParserOptions(header_lines=0), '123\t456\t789\n', [NumberParser, NumberParser, NumberParser] ], [ 'string with non-numbers and numbers', ParserOptions(header_lines=0), 'abc\tdef\tghi\t123\n', [StringParser, StringParser, StringParser, NumberParser] ], [ 'text with headers', ParserOptions(header_lines=0), 'abc\tdef\tghi\t123\n', [StringParser, StringParser, StringParser, NumberParser] ]]) def test_create(self, name: str, parser_options: ParserOptions, data: str, expected_value_parser_types: List[Type], expected_exception_type: Optional[Type] = None) -> None: factory = FileParserFactory() with mock_builtin_open(data=data): try: file_parser = factory.create('', parser_options) line_parser = file_parser.line_parser self.assertEqual(len(expected_value_parser_types), len(line_parser.value_parsers)) for expected_value_parser_type, value_parser in zip( expected_value_parser_types, line_parser.value_parsers): self.assertIsInstance(value_parser, expected_value_parser_type) except Exception as exception: if expected_exception_type: self.assertIsInstance(exception, expected_exception_type) else: raise
def test_parse_uses_next_line_parser(self) -> None: # Arrange line = 'abc' next_line_parser = LineParser([StringParser()], ParserOptions(), False) file = create_autospec(TextReader) input_line = Line(file=file, index=0, header=False, line=line) next_line_parser_mock = mock.create_autospec(LineParser) next_line_parser_mock.parse = MagicMock( side_effect=lambda l: next_line_parser.parse(l)) line_parser = LineParser([NumberParser()], ParserOptions(), False, next_line_parser_mock) # Act parsed_line = line_parser.parse(input_line) # Assert self.assertIsNotNone(parsed_line.parsed_values) self.assertEqual(1, len(parsed_line.parsed_values)) self.assertEqual(line, parsed_line.parsed_values[0]) next_line_parser_mock.parse.assert_called_once()
class LineProcessorTest(TestCase): @parameterized.expand([ [ 'header string', [], ParserOptions(), ParsedLine( Line(file=create_autospec(TextReader), index=1, header=True, line='Name\tAge\tSalary')), 'Name\tAge\tSalary' ], [ 'data string', [EchoValueProcessor(), EchoValueProcessor(), EchoValueProcessor()], ParserOptions(), ParsedLine(Line(file=create_autospec(TextReader), index=1, header=False, line='John Doe\t23\t10,000'), parsed_values=['John Doe', '23', '10,000']), 'John Doe\t23\t10,000' ] ]) def test_process(self, name: str, value_processors: Sequence[ValueProcessor], options: ParserOptions, line: ParsedLine, expected_result: str) -> None: # Arrange line_processor = LineProcessor(value_processors, options) # Act result = line_processor.process(line) # Assert self.assertEqual(expected_result, result)
def test_create(self, name: str, file_parser: FileParser) -> None: # Arrangement file_parser_factory_mock = create_autospec(FileParserFactory) file_parser_factory_mock.create = MagicMock(return_value=file_parser) factory = FileProcessorFactory(file_parser_factory_mock) # Act result = factory.create('', ParserOptions()) # Assert self.assertIsInstance(result, FileProcessor) self.assertIsInstance(result.line_processor, LineProcessor) self.assertIsNotNone(result.line_processor.value_processors) self.assertEqual(len(file_parser.line_parser.value_parsers), len(result.line_processor.value_processors))
def create_import_file(input_file: str, output_file: str, header_lines: int = 1, line_terminator: str = '\n', field_terminator: str = ',', field_enclosing_value: str = '"', parser_factory_file: Optional[str] = None) -> None: """ Creates an import file """ file_parser_factory: Optional[FileParserFactory] = None parser_options = ParserOptions(header_lines=header_lines, line_terminator=line_terminator, field_terminator=field_terminator, field_enclosing_value=field_enclosing_value) if parser_factory_file: parser_factory_module_name = os.path.basename(parser_factory_file) parser_factory_module_spec = importlib.util.spec_from_file_location( parser_factory_module_name, parser_factory_file) if not parser_factory_module_spec: raise ValueError( f'Cannot FileParserFactory from {parser_factory_file}') parser_factory_module = importlib.util.module_from_spec( parser_factory_module_spec) parser_factory_module_spec.loader.exec_module( parser_factory_module) # type: ignore for module_type_name, module_type in inspect.getmembers( parser_factory_module): if inspect.isclass(module_type) and issubclass( module_type, FileParserFactory): file_parser_factory = module_type() break else: file_parser_factory = FileParserFactory() file_processor_factory = FileProcessorFactory(file_parser_factory) file_processor = file_processor_factory.create(input_file, parser_options) file_processor.process(input_file, output_file)
class LineParserTest(TestCase): @parameterized.expand([['empty string', '', ParserOptions(), []], [ 'string with a wrong field terminator', '1\t2\t3', ParserOptions(field_terminator=','), ['1\t2\t3'] ], [ 'string with a right field terminator', '1\t2\t3', ParserOptions(), ['1', '2', '3'] ], [ 'string with an overridden field terminator', '1,2,3', ParserOptions(field_terminator=','), ['1', '2', '3'] ], [ 'string with enclosed values', '"John Doe"\t"23"\t"10,000"', ParserOptions(field_enclosing_value='"'), ['John Doe', '23', '10,000'] ], [ 'string with several enclosed values', 'John Doe\t23\t"10,000"', ParserOptions(field_enclosing_value='"'), ['John Doe', '23', '10,000'] ]]) def test_split(self, name: str, string: str, parser_options: ParserOptions, expected_result: List[str]) -> None: # Act result = LineParser.split(string, parser_options) # Assert self.assertEqual(expected_result, result) @parameterized.expand([ [ 'empty string and empty value parsers list (skip errors = True)', [], ParserOptions(), True, '', [] ], [ 'empty string and empty value parsers list (skip errors = False)', [], ParserOptions(), False, '', [] ], [ 'empty string and non-empty value parsers list (skip errors = True)', [NumberParser()], ParserOptions(), True, '', None ], [ 'empty string and non-empty value parsers list (skip errors = False)', [NumberParser()], ParserOptions(), False, '', [], ParsingError ], [ 'string with missing fields (skip errors = True)', [NumberParser(), NumberParser()], ParserOptions(), True, '123\t', None ], [ 'string with missing fields (skip errors = False)', [NumberParser(), NumberParser()], ParserOptions(), False, '123\t', [], ParsingError ], [ 'string with numbers and strings', [NumberParser(), StringParser(), NumberParser()], ParserOptions(field_terminator=','), False, '123,abc,456', ['123', 'abc', '456'] ] ]) def test_parse(self, name: str, value_parsers: List[ValueParser], parser_options: ParserOptions, skip_incorrect_lines: bool, line: str, expected_result: List[str], expected_exception_type: Optional[Type] = None) -> None: # Arrange line_parser = LineParser(value_parsers, parser_options, skip_incorrect_lines) file = create_autospec(TextReader) input_line = Line(file=file, index=0, header=False, line=line) try: # Act parsed_line = line_parser.parse(input_line) # Assert self.assertEqual(input_line.index, parsed_line.index) self.assertEqual(input_line.header, parsed_line.header) self.assertEqual(input_line.line, parsed_line.line) self.assertEqual(expected_result, parsed_line.parsed_values) except Exception as exception: if expected_exception_type: self.assertIsInstance(exception, expected_exception_type) else: raise def test_parse_uses_next_line_parser(self) -> None: # Arrange line = 'abc' next_line_parser = LineParser([StringParser()], ParserOptions(), False) file = create_autospec(TextReader) input_line = Line(file=file, index=0, header=False, line=line) next_line_parser_mock = mock.create_autospec(LineParser) next_line_parser_mock.parse = MagicMock( side_effect=lambda l: next_line_parser.parse(l)) line_parser = LineParser([NumberParser()], ParserOptions(), False, next_line_parser_mock) # Act parsed_line = line_parser.parse(input_line) # Assert self.assertIsNotNone(parsed_line.parsed_values) self.assertEqual(1, len(parsed_line.parsed_values)) self.assertEqual(line, parsed_line.parsed_values[0]) next_line_parser_mock.parse.assert_called_once()
class FileParserTest(TestCase): @parameterized.expand( [[ 'empty file', LineParser([], ParserOptions()), ParserOptions(), '', [] ], [ 'file with one header', LineParser([], ParserOptions()), ParserOptions(), 'name\tage\tsalary', [None] ], [ 'file with one header and one data line', LineParser([StringParser(), NumberParser(), NumberParser()], ParserOptions()), ParserOptions(), 'name\tage\tsalary\nJohn Doe\t23\t10,000', [None, ['John Doe', '23', '10,000']] ], [ 'file with two header lines and one data line', LineParser([StringParser(), NumberParser(), NumberParser()], ParserOptions(header_lines=2)), ParserOptions(header_lines=2), 'Personnel\nname\tage\tsalary\nJohn Doe\t23\t10,000', [None, None, ['John Doe', '23', '10,000']] ], [ 'file with one header and two data lines', LineParser([StringParser(), NumberParser(), NumberParser()], ParserOptions()), ParserOptions(), 'name\tage\tsalary\nJohn Doe\t23\t10,000\nBob Doe\t30\t15,000', [None, ['John Doe', '23', '10,000'], ['Bob Doe', '30', '15,000']] ]]) def test_parse(self, name: str, line_parser: LineParser, parser_options: ParserOptions, data: str, expected_result: List[Optional[List[str]]], expected_exception_type: Optional[Type] = None) -> None: file_parser = FileParser(line_parser, parser_options) with mock_builtin_open(data=data): try: result_iterator = file_parser.parse('') result = list(result_iterator) self.assertEqual(len(expected_result), len(result)) for expected_line, line in zip(expected_result, result): self.assertIsInstance(line, ParsedLine) self.assertEqual(expected_line, line.parsed_values) except Exception as exception: if expected_exception_type: self.assertIsInstance(exception, expected_exception_type) else: raise
class FileProcessorTest(TestCase): @parameterized.expand( [['empty file', LineProcessor([], ParserOptions()), []], [ 'file with only header', LineProcessor([], ParserOptions()), [ ParsedLine( Line(file=create_autospec(TextReader), index=0, header=True, line='Name\tAge\tSalary')) ] ], [ 'file with one header and one data line', LineProcessor([], ParserOptions()), [ ParsedLine( Line(file=create_autospec(TextReader), index=0, header=True, line='Name\tAge\tSalary')), ParsedLine(Line(file=create_autospec(TextReader), index=1, header=True, line='John Doe\t23\t10,000'), parsed_values=['John Doe', '23', '10,000']) ] ], [ 'file with one header and two data lines', LineProcessor([], ParserOptions()), [ ParsedLine( Line(file=create_autospec(TextReader), index=0, header=True, line='Name\tAge\tSalary')), ParsedLine(Line(file=create_autospec(TextReader), index=1, header=True, line='John Doe\t23\t10,000'), parsed_values=['John Doe', '23', '10,000']), ParsedLine(Line(file=create_autospec(TextReader), index=2, header=True, line='Bob Doe\t30\t50,000'), parsed_values=['Bob Doe', '30', '50,000']) ] ]]) def test_process(self, name: str, line_processor: LineProcessor, lines: List[ParsedLine]) -> None: # Arrange file_parser = create_autospec(FileParser) file_parser.parse = MagicMock(side_effect=lambda _: lines) file_processor = FileProcessor(file_parser, line_processor) text_writer_instance_mock = create_autospec(TextWriter) text_writer_mock = create_autospec(TextWriter) text_writer_mock.__enter__ = MagicMock( return_value=text_writer_instance_mock) # Act with patch('csv_import.csv.text.TextWriter.create', MagicMock(return_value=text_writer_mock)): with mock_builtin_open(): file_processor.process('', '') # Arrange file_parser.parse.called_once() text_writer_instance_mock.write_line.assert_has_calls( [call(line.line) for line in lines])
class FileProcessorIntegrationTest(TestCase): @parameterized.expand( [[ '01_correct_file_with_commas.csv', ParserOptions(field_terminator=',', field_enclosing_value='"'), [ ParsedLine( Line(file=create_autospec(TextReader), index=0, header=True, line='Name,Age,Salary\n')), ParsedLine(Line(file=create_autospec(TextReader), index=1, header=False, line='John Doe,23,"10,000"'), parsed_values=['John Doe', '23', '10,000']) ] ], [ '02_correct_file_with_tabulations.csv', ParserOptions(field_terminator='\t', field_enclosing_value='"'), [ ParsedLine( Line(file=create_autospec(TextReader), index=0, header=True, line='Name\tAge\tSalary\n')), ParsedLine(Line(file=create_autospec(TextReader), index=1, header=False, line='John Doe\t23\t10,000'), parsed_values=['John Doe', '23', '10,000']) ] ], [ '02_correct_file_with_tabulations.csv', ParserOptions(field_terminator='\t', field_enclosing_value='"'), [ ParsedLine( Line(file=create_autospec(TextReader), index=0, header=True, line='Name\tAge\tSalary\n')), ParsedLine(Line(file=create_autospec(TextReader), index=1, header=False, line='John Doe\t23\t10,000'), parsed_values=['John Doe', '23', '10,000']) ] ]]) def test_process(self, input_file: str, options: ParserOptions, expected_parsed_lines: List[ParsedLine]) -> None: # Arrange def process(parsed_line: ParsedLine) -> str: parsed_lines.append(parsed_line) return original_process(parsed_line) file_parser_factory = FileParserFactory() file_processor_factory = FileProcessorFactory(file_parser_factory) current_dir = os.path.dirname(os.path.realpath(__file__)) input_file_path = os.path.join(current_dir, 'fixtures', input_file) output_file_path = input_file_path + '.out' parsed_lines: List[ParsedLine] = [] file_processor = file_processor_factory.create(input_file_path, options) original_process = file_processor.line_processor.process file_processor.line_processor.process = MagicMock(side_effect=process) # Act with patch('csv_import.csv.text.TextWriter.create'): file_processor.process(input_file_path, output_file_path) # Assert for expected_parsed_line, parsed_line in zip(expected_parsed_lines, parsed_lines): self.assertEqual(expected_parsed_line.index, parsed_line.index) self.assertEqual(expected_parsed_line.header, parsed_line.header) self.assertEqual(expected_parsed_line.line, parsed_line.line) self.assertEqual(expected_parsed_line.parsed_values, parsed_line.parsed_values)