class FileProcessorFactoryTest(TestCase):
    @parameterized.expand(
        [[
            'parser without single-value parsers',
            FileParser(LineParser([], ParserOptions()), ParserOptions())
        ],
         [
             'parser with single-value parsers',
             FileParser(
                 LineParser([StringParser(),
                             NumberParser(),
                             NumberParser()], ParserOptions()),
                 ParserOptions())
         ]])
    def test_create(self, name: str, file_parser: FileParser) -> None:
        # Arrangement
        file_parser_factory_mock = create_autospec(FileParserFactory)
        file_parser_factory_mock.create = MagicMock(return_value=file_parser)
        factory = FileProcessorFactory(file_parser_factory_mock)

        # Act
        result = factory.create('', ParserOptions())

        # Assert
        self.assertIsInstance(result, FileProcessor)
        self.assertIsInstance(result.line_processor, LineProcessor)
        self.assertIsNotNone(result.line_processor.value_processors)
        self.assertEqual(len(file_parser.line_parser.value_parsers),
                         len(result.line_processor.value_processors))
Exemple #2
0
class FileParserFactoryTest(TestCase):
    @parameterized.expand(
        [['empty string',
          ParserOptions(), '', [], ParsingError],
         [
             'string with numbers',
             ParserOptions(header_lines=0), '123\t456\t789\n',
             [NumberParser, NumberParser, NumberParser]
         ],
         [
             'string with non-numbers and numbers',
             ParserOptions(header_lines=0), 'abc\tdef\tghi\t123\n',
             [StringParser, StringParser, StringParser, NumberParser]
         ],
         [
             'text with headers',
             ParserOptions(header_lines=0), 'abc\tdef\tghi\t123\n',
             [StringParser, StringParser, StringParser, NumberParser]
         ]])
    def test_create(self,
                    name: str,
                    parser_options: ParserOptions,
                    data: str,
                    expected_value_parser_types: List[Type],
                    expected_exception_type: Optional[Type] = None) -> None:
        factory = FileParserFactory()

        with mock_builtin_open(data=data):
            try:
                file_parser = factory.create('', parser_options)
                line_parser = file_parser.line_parser

                self.assertEqual(len(expected_value_parser_types),
                                 len(line_parser.value_parsers))

                for expected_value_parser_type, value_parser in zip(
                        expected_value_parser_types,
                        line_parser.value_parsers):
                    self.assertIsInstance(value_parser,
                                          expected_value_parser_type)
            except Exception as exception:
                if expected_exception_type:
                    self.assertIsInstance(exception, expected_exception_type)
                else:
                    raise
Exemple #3
0
    def test_parse_uses_next_line_parser(self) -> None:
        # Arrange
        line = 'abc'
        next_line_parser = LineParser([StringParser()], ParserOptions(), False)
        file = create_autospec(TextReader)
        input_line = Line(file=file, index=0, header=False, line=line)
        next_line_parser_mock = mock.create_autospec(LineParser)
        next_line_parser_mock.parse = MagicMock(
            side_effect=lambda l: next_line_parser.parse(l))
        line_parser = LineParser([NumberParser()], ParserOptions(), False,
                                 next_line_parser_mock)

        # Act
        parsed_line = line_parser.parse(input_line)

        # Assert
        self.assertIsNotNone(parsed_line.parsed_values)
        self.assertEqual(1, len(parsed_line.parsed_values))
        self.assertEqual(line, parsed_line.parsed_values[0])
        next_line_parser_mock.parse.assert_called_once()
class LineProcessorTest(TestCase):
    @parameterized.expand([
        [
            'header string', [],
            ParserOptions(),
            ParsedLine(
                Line(file=create_autospec(TextReader),
                     index=1,
                     header=True,
                     line='Name\tAge\tSalary')), 'Name\tAge\tSalary'
        ],
        [
            'data string',
            [EchoValueProcessor(),
             EchoValueProcessor(),
             EchoValueProcessor()],
            ParserOptions(),
            ParsedLine(Line(file=create_autospec(TextReader),
                            index=1,
                            header=False,
                            line='John Doe\t23\t10,000'),
                       parsed_values=['John Doe', '23', '10,000']),
            'John Doe\t23\t10,000'
        ]
    ])
    def test_process(self, name: str,
                     value_processors: Sequence[ValueProcessor],
                     options: ParserOptions, line: ParsedLine,
                     expected_result: str) -> None:
        # Arrange
        line_processor = LineProcessor(value_processors, options)

        # Act
        result = line_processor.process(line)

        # Assert
        self.assertEqual(expected_result, result)
    def test_create(self, name: str, file_parser: FileParser) -> None:
        # Arrangement
        file_parser_factory_mock = create_autospec(FileParserFactory)
        file_parser_factory_mock.create = MagicMock(return_value=file_parser)
        factory = FileProcessorFactory(file_parser_factory_mock)

        # Act
        result = factory.create('', ParserOptions())

        # Assert
        self.assertIsInstance(result, FileProcessor)
        self.assertIsInstance(result.line_processor, LineProcessor)
        self.assertIsNotNone(result.line_processor.value_processors)
        self.assertEqual(len(file_parser.line_parser.value_parsers),
                         len(result.line_processor.value_processors))
Exemple #6
0
def create_import_file(input_file: str,
                       output_file: str,
                       header_lines: int = 1,
                       line_terminator: str = '\n',
                       field_terminator: str = ',',
                       field_enclosing_value: str = '"',
                       parser_factory_file: Optional[str] = None) -> None:
    """
    Creates an import file
    """

    file_parser_factory: Optional[FileParserFactory] = None
    parser_options = ParserOptions(header_lines=header_lines,
                                   line_terminator=line_terminator,
                                   field_terminator=field_terminator,
                                   field_enclosing_value=field_enclosing_value)

    if parser_factory_file:
        parser_factory_module_name = os.path.basename(parser_factory_file)
        parser_factory_module_spec = importlib.util.spec_from_file_location(
            parser_factory_module_name, parser_factory_file)

        if not parser_factory_module_spec:
            raise ValueError(
                f'Cannot FileParserFactory from {parser_factory_file}')

        parser_factory_module = importlib.util.module_from_spec(
            parser_factory_module_spec)
        parser_factory_module_spec.loader.exec_module(
            parser_factory_module)  # type: ignore

        for module_type_name, module_type in inspect.getmembers(
                parser_factory_module):
            if inspect.isclass(module_type) and issubclass(
                    module_type, FileParserFactory):
                file_parser_factory = module_type()
                break

    else:
        file_parser_factory = FileParserFactory()

    file_processor_factory = FileProcessorFactory(file_parser_factory)
    file_processor = file_processor_factory.create(input_file, parser_options)

    file_processor.process(input_file, output_file)
Exemple #7
0
class LineParserTest(TestCase):
    @parameterized.expand([['empty string', '',
                            ParserOptions(), []],
                           [
                               'string with a wrong field terminator',
                               '1\t2\t3',
                               ParserOptions(field_terminator=','),
                               ['1\t2\t3']
                           ],
                           [
                               'string with a right field terminator',
                               '1\t2\t3',
                               ParserOptions(), ['1', '2', '3']
                           ],
                           [
                               'string with an overridden field terminator',
                               '1,2,3',
                               ParserOptions(field_terminator=','),
                               ['1', '2', '3']
                           ],
                           [
                               'string with enclosed values',
                               '"John Doe"\t"23"\t"10,000"',
                               ParserOptions(field_enclosing_value='"'),
                               ['John Doe', '23', '10,000']
                           ],
                           [
                               'string with several enclosed values',
                               'John Doe\t23\t"10,000"',
                               ParserOptions(field_enclosing_value='"'),
                               ['John Doe', '23', '10,000']
                           ]])
    def test_split(self, name: str, string: str, parser_options: ParserOptions,
                   expected_result: List[str]) -> None:
        # Act
        result = LineParser.split(string, parser_options)

        # Assert
        self.assertEqual(expected_result, result)

    @parameterized.expand([
        [
            'empty string and empty value parsers list (skip errors = True)',
            [],
            ParserOptions(), True, '', []
        ],
        [
            'empty string and empty value parsers list (skip errors = False)',
            [],
            ParserOptions(), False, '', []
        ],
        [
            'empty string and non-empty value parsers list (skip errors = True)',
            [NumberParser()],
            ParserOptions(), True, '', None
        ],
        [
            'empty string and non-empty value parsers list (skip errors = False)',
            [NumberParser()],
            ParserOptions(), False, '', [], ParsingError
        ],
        [
            'string with missing fields (skip errors = True)',
            [NumberParser(), NumberParser()],
            ParserOptions(), True, '123\t', None
        ],
        [
            'string with missing fields (skip errors = False)',
            [NumberParser(), NumberParser()],
            ParserOptions(), False, '123\t', [], ParsingError
        ],
        [
            'string with numbers and strings',
            [NumberParser(), StringParser(),
             NumberParser()],
            ParserOptions(field_terminator=','), False, '123,abc,456',
            ['123', 'abc', '456']
        ]
    ])
    def test_parse(self,
                   name: str,
                   value_parsers: List[ValueParser],
                   parser_options: ParserOptions,
                   skip_incorrect_lines: bool,
                   line: str,
                   expected_result: List[str],
                   expected_exception_type: Optional[Type] = None) -> None:
        # Arrange
        line_parser = LineParser(value_parsers, parser_options,
                                 skip_incorrect_lines)
        file = create_autospec(TextReader)
        input_line = Line(file=file, index=0, header=False, line=line)

        try:
            # Act
            parsed_line = line_parser.parse(input_line)

            # Assert
            self.assertEqual(input_line.index, parsed_line.index)
            self.assertEqual(input_line.header, parsed_line.header)
            self.assertEqual(input_line.line, parsed_line.line)
            self.assertEqual(expected_result, parsed_line.parsed_values)
        except Exception as exception:
            if expected_exception_type:
                self.assertIsInstance(exception, expected_exception_type)
            else:
                raise

    def test_parse_uses_next_line_parser(self) -> None:
        # Arrange
        line = 'abc'
        next_line_parser = LineParser([StringParser()], ParserOptions(), False)
        file = create_autospec(TextReader)
        input_line = Line(file=file, index=0, header=False, line=line)
        next_line_parser_mock = mock.create_autospec(LineParser)
        next_line_parser_mock.parse = MagicMock(
            side_effect=lambda l: next_line_parser.parse(l))
        line_parser = LineParser([NumberParser()], ParserOptions(), False,
                                 next_line_parser_mock)

        # Act
        parsed_line = line_parser.parse(input_line)

        # Assert
        self.assertIsNotNone(parsed_line.parsed_values)
        self.assertEqual(1, len(parsed_line.parsed_values))
        self.assertEqual(line, parsed_line.parsed_values[0])
        next_line_parser_mock.parse.assert_called_once()
Exemple #8
0
class FileParserTest(TestCase):
    @parameterized.expand(
        [[
            'empty file',
            LineParser([], ParserOptions()),
            ParserOptions(), '', []
        ],
         [
             'file with one header',
             LineParser([], ParserOptions()),
             ParserOptions(), 'name\tage\tsalary', [None]
         ],
         [
             'file with one header and one data line',
             LineParser([StringParser(),
                         NumberParser(),
                         NumberParser()], ParserOptions()),
             ParserOptions(), 'name\tage\tsalary\nJohn Doe\t23\t10,000',
             [None, ['John Doe', '23', '10,000']]
         ],
         [
             'file with two header lines and one data line',
             LineParser([StringParser(),
                         NumberParser(),
                         NumberParser()], ParserOptions(header_lines=2)),
             ParserOptions(header_lines=2),
             'Personnel\nname\tage\tsalary\nJohn Doe\t23\t10,000',
             [None, None, ['John Doe', '23', '10,000']]
         ],
         [
             'file with one header and two data lines',
             LineParser([StringParser(),
                         NumberParser(),
                         NumberParser()], ParserOptions()),
             ParserOptions(),
             'name\tage\tsalary\nJohn Doe\t23\t10,000\nBob Doe\t30\t15,000',
             [None, ['John Doe', '23', '10,000'], ['Bob Doe', '30', '15,000']]
         ]])
    def test_parse(self,
                   name: str,
                   line_parser: LineParser,
                   parser_options: ParserOptions,
                   data: str,
                   expected_result: List[Optional[List[str]]],
                   expected_exception_type: Optional[Type] = None) -> None:
        file_parser = FileParser(line_parser, parser_options)

        with mock_builtin_open(data=data):
            try:
                result_iterator = file_parser.parse('')
                result = list(result_iterator)

                self.assertEqual(len(expected_result), len(result))

                for expected_line, line in zip(expected_result, result):
                    self.assertIsInstance(line, ParsedLine)
                    self.assertEqual(expected_line, line.parsed_values)
            except Exception as exception:
                if expected_exception_type:
                    self.assertIsInstance(exception, expected_exception_type)
                else:
                    raise
class FileProcessorTest(TestCase):
    @parameterized.expand(
        [['empty file', LineProcessor([], ParserOptions()), []],
         [
             'file with only header',
             LineProcessor([], ParserOptions()),
             [
                 ParsedLine(
                     Line(file=create_autospec(TextReader),
                          index=0,
                          header=True,
                          line='Name\tAge\tSalary'))
             ]
         ],
         [
             'file with one header and one data line',
             LineProcessor([], ParserOptions()),
             [
                 ParsedLine(
                     Line(file=create_autospec(TextReader),
                          index=0,
                          header=True,
                          line='Name\tAge\tSalary')),
                 ParsedLine(Line(file=create_autospec(TextReader),
                                 index=1,
                                 header=True,
                                 line='John Doe\t23\t10,000'),
                            parsed_values=['John Doe', '23', '10,000'])
             ]
         ],
         [
             'file with one header and two data lines',
             LineProcessor([], ParserOptions()),
             [
                 ParsedLine(
                     Line(file=create_autospec(TextReader),
                          index=0,
                          header=True,
                          line='Name\tAge\tSalary')),
                 ParsedLine(Line(file=create_autospec(TextReader),
                                 index=1,
                                 header=True,
                                 line='John Doe\t23\t10,000'),
                            parsed_values=['John Doe', '23', '10,000']),
                 ParsedLine(Line(file=create_autospec(TextReader),
                                 index=2,
                                 header=True,
                                 line='Bob Doe\t30\t50,000'),
                            parsed_values=['Bob Doe', '30', '50,000'])
             ]
         ]])
    def test_process(self, name: str, line_processor: LineProcessor,
                     lines: List[ParsedLine]) -> None:
        # Arrange
        file_parser = create_autospec(FileParser)
        file_parser.parse = MagicMock(side_effect=lambda _: lines)
        file_processor = FileProcessor(file_parser, line_processor)
        text_writer_instance_mock = create_autospec(TextWriter)
        text_writer_mock = create_autospec(TextWriter)
        text_writer_mock.__enter__ = MagicMock(
            return_value=text_writer_instance_mock)

        # Act
        with patch('csv_import.csv.text.TextWriter.create',
                   MagicMock(return_value=text_writer_mock)):
            with mock_builtin_open():
                file_processor.process('', '')

        # Arrange
        file_parser.parse.called_once()

        text_writer_instance_mock.write_line.assert_has_calls(
            [call(line.line) for line in lines])
class FileProcessorIntegrationTest(TestCase):
    @parameterized.expand(
        [[
            '01_correct_file_with_commas.csv',
            ParserOptions(field_terminator=',', field_enclosing_value='"'),
            [
                ParsedLine(
                    Line(file=create_autospec(TextReader),
                         index=0,
                         header=True,
                         line='Name,Age,Salary\n')),
                ParsedLine(Line(file=create_autospec(TextReader),
                                index=1,
                                header=False,
                                line='John Doe,23,"10,000"'),
                           parsed_values=['John Doe', '23', '10,000'])
            ]
        ],
         [
             '02_correct_file_with_tabulations.csv',
             ParserOptions(field_terminator='\t', field_enclosing_value='"'),
             [
                 ParsedLine(
                     Line(file=create_autospec(TextReader),
                          index=0,
                          header=True,
                          line='Name\tAge\tSalary\n')),
                 ParsedLine(Line(file=create_autospec(TextReader),
                                 index=1,
                                 header=False,
                                 line='John Doe\t23\t10,000'),
                            parsed_values=['John Doe', '23', '10,000'])
             ]
         ],
         [
             '02_correct_file_with_tabulations.csv',
             ParserOptions(field_terminator='\t', field_enclosing_value='"'),
             [
                 ParsedLine(
                     Line(file=create_autospec(TextReader),
                          index=0,
                          header=True,
                          line='Name\tAge\tSalary\n')),
                 ParsedLine(Line(file=create_autospec(TextReader),
                                 index=1,
                                 header=False,
                                 line='John Doe\t23\t10,000'),
                            parsed_values=['John Doe', '23', '10,000'])
             ]
         ]])
    def test_process(self, input_file: str, options: ParserOptions,
                     expected_parsed_lines: List[ParsedLine]) -> None:
        # Arrange
        def process(parsed_line: ParsedLine) -> str:
            parsed_lines.append(parsed_line)
            return original_process(parsed_line)

        file_parser_factory = FileParserFactory()
        file_processor_factory = FileProcessorFactory(file_parser_factory)
        current_dir = os.path.dirname(os.path.realpath(__file__))
        input_file_path = os.path.join(current_dir, 'fixtures', input_file)
        output_file_path = input_file_path + '.out'
        parsed_lines: List[ParsedLine] = []

        file_processor = file_processor_factory.create(input_file_path,
                                                       options)
        original_process = file_processor.line_processor.process
        file_processor.line_processor.process = MagicMock(side_effect=process)

        # Act
        with patch('csv_import.csv.text.TextWriter.create'):
            file_processor.process(input_file_path, output_file_path)

        # Assert
        for expected_parsed_line, parsed_line in zip(expected_parsed_lines,
                                                     parsed_lines):
            self.assertEqual(expected_parsed_line.index, parsed_line.index)
            self.assertEqual(expected_parsed_line.header, parsed_line.header)
            self.assertEqual(expected_parsed_line.line, parsed_line.line)
            self.assertEqual(expected_parsed_line.parsed_values,
                             parsed_line.parsed_values)