class TinkoffCSVParser(BaseCSVParser): columns = [ Column( 'requested_at', index=0, required=True, processor=DateTimeProcessor(['%d.%m.%Y %H:%M:%S']), ), Column('paid_at', index=1, required=False, processor=DateProcessor(['%d.%m.%Y'])), Column('card_last_digits', index=2, required=True), Column('status', index=3, required=True), Column('amount_currency', index=4, required=True, processor=DecimalProcessor()), Column('currency', index=5, required=True), Column('amount_rub', index=6, required=True, processor=DecimalProcessor()), Column('category', index=9, required=True), Column('mcc_code', index=10, required=False, processor=IntegerProcessor()), Column('description', index=11, required=True), ]
class Parser(BaseXLSXParser): columns = [ Column('last_name', index=0), Column('first_name', index=1), ] def clean_column_last_name(self, value): return f'Modified {value}'
class Parser(BaseParser): columns = [ Column('first_name', index=0), Column('last_name', index=1), Column('age', index=2, processor=lambda x: x if isinstance(x, int) else raise_(ColumnError())), ]
class Parser(BaseXLSXParser): columns = [ Column('first_name', index=0), Column('last_name', index=1), Column('middle_name', index=2), ] unique_together = [ ['first_name', 'last_name'], ['last_name', 'middle_name'], ]
class Parser(BaseXLSXParser): skip_empty_rows = parser_skip_empty_rows add_file_path = False add_row_index = False columns = [ Column('column1', index=0), ]
class Parser(BaseXLSXParser): skip_empty_rows = parser_skip_empty_rows # add_file_path = False # TODO: удалить, если clean_row не должен возвращать путь к файлу add_row_index = False columns = [ Column('column1', index=0), ]
class Parser(BaseParser): add_file_path = False add_row_index = False columns = [ Column('column1', index=0), ] def clean_column_column1(self, value): return 'any value'
class Parser(BaseParser): # add_file_path = False # TODO: удалить, если clean_row не должен возвращать путь к файлу add_row_index = False columns = [ Column('column1', index=0), ] def clean_column_column1(self, value): return 'any value'
def parse_column(self, row: List[Any], column: Column, row_index: int) -> Any: try: value = row[column.index] except IndexError: value = None try: value = column.processor(value) value = self.clean_column(column, value) except StopParsing as e: raise e except Exception as e: raise ColumnError(getattr(e, 'messages', str(e))) from e return value
class Parser(BaseXLSXParser): header_row_index = header_row_idx columns = [ Column('column1', index=0, header='колонка1'), Column('column2', index=1, header='колонка2', validate_header=False), Column('column3', index=2), Column('column4', index=3, header='колонка4', validate_header=True), Column('column5', index=4, header='Колонка5'), Column('column6', index=5, header='колонка6'), ]
class Parser(BaseXLSXParser): columns = [ Column('id', index=0, unique=True), ]
class Parser(BaseXLSXParser): columns = [ Column('column1', index=column_index, header=column_header), ]
class Parser(BaseMultipleSheetsXLSXParser): columns = [ Column('column1', index=column_index, header=column_header), ]
class XLSXParser(BaseXLSXParser): columns = [ Column('first_name', index=0, header='First Name'), Column('last_name', index=1, header='Last Name'), ]
class Parser(BaseXLSXParser): # add_file_path = True # TODO: удалить, если clean_row не должен возвращать путь к файлу add_row_index = True skip_empty_rows = True columns = [Column('first_name', index=1)]
def test_clean_row(base_parser): row_data, row, row_index = {'first_name': 'test'}, ('test', ), 1 result = base_parser.clean_row(row_data, row, row_index) assert result == { 'first_name': 'test', 'file_path': 'test_file_path', 'row_index': row_index, } @pytest.mark.parametrize( 'column, row_values, expected_value', ( (Column('column_name', index=0), [0], 0), (Column('column_name', index=10), [], None), (Column('column_name', index=0, processor=lambda x: str(x)), [0], '0'), (Column('column_name', index=0, processor=lambda x: int(x)), ['1'], 1), ), ) def test_parse_column(column, row_values, expected_value, row_factory): class Parser(BaseParser): columns = [column] parser = Parser(None) row = row_factory(row_values) assert parser.parse_column(row, column, row_index=0) == expected_value
class MultipleSheetsXLSXParser(BaseMultipleSheetsXLSXParser): columns = [ Column('first_name', index=0, header='First Name', processor=FloatProcessor()), Column('last_name', index=1, header='Last Name', processor=FloatProcessor()), ]
class Parser(BaseMultipleSheetsXLSXParser): columns = [ Column('column1', index=0), ]
class XLSXParser(BaseXLSXParser): columns = [ Column('first_name', index=0), Column('last_name', index=1), ] first_data_row_index = 0
import pytest from openpyxl.workbook import Workbook from import_me.columns import Column from import_me.exceptions import StopParsing from import_me.parsers.xlsx import BaseXLSXParser, BaseMultipleSheetsXLSXParser from import_me.processors import FloatProcessor DEFAULT_WORKBOOK_DATA = { 'header': ['First Name', 'Last Name'], 'data': [['Ivan', 'Ivanov'], ['Petr', 'Petrov']], } DEFAULT_PARSER_COLUMNS = [ Column('first_name', index=0, header='First Name'), Column('last_name', index=1, header='Last Name'), ] def test_base_xlsx_parser(xlsx_file_factory): class XLSXParser(BaseXLSXParser): columns = DEFAULT_PARSER_COLUMNS xlsx_file = xlsx_file_factory(**DEFAULT_WORKBOOK_DATA) parser = XLSXParser(file_path=xlsx_file.name) parser() assert parser.has_errors is False assert parser.cleaned_data == [
class Parser(BaseParser): columns = [Column('column_name', index=0, processor=column_processor)]
class Parser(BaseXLSXParser): add_file_path = True add_row_index = True skip_empty_rows = True columns = [Column('first_name', index=1)]
class Parser(BaseParser): columns = [ Column('column1', index=0, required=False), Column('column2', index=1, required=True), Column('column3', index=2, header='column3 name', required=True), ]