def _fw_schema(custom_function):
    return [
        ('ID', StringParser(1, 2)),
        ('NAME',
         StringParser(3, 5).change_case('U').not_null('nan',
                                                      allow_white_space=True)),
        ('GENDER', StringParser(6, 6).value_set(['M', 'F'])),
        ('NOT_NULLABLE_VALUE', StringParser(7, 11).not_null('dummy')),
        ('NULLABLE_VALUE', StringParser(7, 11)),
        ('BIRTH_YEAR', IntegerParser(12, 13).max_value(20)),
        ('BALANCE', FloatParser(14, 17).min_value(10.0))
    ]
Exemple #2
0
def test_not_null_validator():
    func = StringParser(quoted=0).not_null().build()
    assert func("SOME_VALUE") == "SOME_VALUE"
    with pytest.raises(NullValueInNotNullFieldException):
        assert func(None)
        assert func("") == ""
    p = StringParser().not_null(allow_white_space=True)
    func = p.build()
    assert func("") == ""

    # Default value assignment check
    func = StringParser(quoted=0).not_null(default_value="NA").build()
    assert func(None) == "NA"
    assert func("") == "NA"
def _schema(custom_function):
    return [
        ('ID', StringParser(quoted=1)),
        ('RUN_ID', StringParser().regex_match(r'\w+_\d{4}-\d{2}-\d{2}').change_case('u')),
        ('CLASS', StringParser(start=1, end=1).value_set(['a', 'b', 'A'])),
        ('INITIATED_ON', DatetimeParser(formats=['%Y%m%d', '%Y-%m-%d %H:%M:%S'])
         .convert('%Y/%m/%d').max_value(datetime.datetime.now())
         .min_value(value='20000101', format='%Y%m%d')
         .not_null(datetime.datetime.strptime('19001231', '%Y%m%d'))
         ),
        ('ASKED_AMOUNT', IntegerParser().max_value(2000).not_null(default_value=0)),
        ('ADJUSTED_AMOUNT', FloatParser().min_value(10.0).not_null(0.0)),
        ('ROLE_MODEL', ConstantParser('Leo Messi')),
        ('BLOCK_NUMBER', IntegerParser().add_func(custom_function).range(0, 40))
    ]
from parseval.parser import (StringParser, IntegerParser, FloatParser)
# In this example we will see how fixed width dataset can be processed using `Parser` API
# For all types of dataset structure, fixed width data is supported
# For demo purposes we will consider input in list of rows format, and the parsed rows will be returned in JSON format

# To process any kind of dataset, first one schema has to be defined
# Schema is nothing but a set of parsers, stored in a specific structure in the sequence of columns in dataset

# The cursors has to provided in a list, where each element of the list is a tuple.
# First element of each element is the column name, this is just for reference, no internal usage
# Second element of each tuple is the actual parser (parser objects, not built parser function)

logging.basicConfig(format='%(levelname)s:%(asctime)s:: %(message)s',
                    level=logging.DEBUG)
fw_schema = [
    ('ID', StringParser(1, 2)),
    ('NAME',
     StringParser(3, 5).change_case('U').not_null('nan',
                                                  allow_white_space=True)),
    ('GENDER', StringParser(6, 6).value_set(['M', 'F'])),
    ('NOT_NULLABLE_VALUE', StringParser(7, 11).not_null('dummy')),
    ('NULLABLE_VALUE', StringParser(7, 11)),
    ('BIRTH_YEAR', IntegerParser(12, 13).max_value(20)),
    ('BALANCE', FloatParser(14, 17).min_value(10.0))
]
p = Parser(schema=fw_schema,
           stop_on_error=1,
           input_row_format='fixed-width',
           parsed_row_format='dict')
logging.info(('#' * 50) + " FIXED WIDTH DATASET PARSING " + ('#' * 50))
parsed_data = p.parse(
Exemple #5
0
def test_add_func_validator():
    func = StringParser(quoted=0).add_func(_first_three_char_check).build()
    assert func("ABC2344") == "2344"
    with pytest.raises(Exception):
        assert func("PQR2344") == "2344"
Exemple #6
0
def test_change_case_validator():
    func = StringParser(quoted=0).change_case(case_type='u').build()
    assert func('Manual_2020-23-12') == "MANUAL_2020-23-12"
Exemple #7
0
def test_regex_match_validator():
    pattern = r'\w+_\d{4}-\d{2}-\d{2}'
    func = StringParser(quoted=0).regex_match(pattern=pattern).build()
    assert func('Manual_2020-23-12') == "Manual_2020-23-12"
    with pytest.raises(RegexMatchException):
        assert func('Trig2020-23-12')
Exemple #8
0
def test_value_set_validator():
    allowed_values = ['MAEVE', 'OTIS', 'ERIC']
    func = StringParser(quoted=0).value_set(allowed_values).build()
    assert func('OTIS') == "OTIS"
    with pytest.raises(ValidValueCheckException):
        assert func('GRY')
Exemple #9
0
def test_single_quoted_data():
    func = StringParser(quoted=2).build()
    data = "'ABC'"
    assert func(data) == "ABC"
Exemple #10
0
def test_double_quoted_data():
    func = StringParser(quoted=1).build()
    data = '"ABC"'
    assert func(data) == 'ABC'
Exemple #11
0
def test_non_quoted_data():
    func = StringParser().build()
    data = "ABC"
    assert func(data) == "ABC"
Exemple #12
0
# Import required parser class
import logging
from parseval.parser import StringParser

logging.basicConfig(format='%(levelname)s:%(asctime)s:: %(message)s',
                    level=logging.DEBUG)
basic_parser = StringParser()  # Create basic parser object
basic_parse_func = basic_parser.build()  # Build the parser function
input_data = 'Any String'  # Input Data
basic_parsed_output = basic_parse_func(input_data)  # Parse data

logging.info(('#' * 50) + " STRING PARSING " + ('#' * 50))
logging.info("====> Simple Data Parsing example:")
logging.info("Input 1: {}".format(input_data))
logging.info("Output 1: {}".format(basic_parsed_output))
logging.info('\n')

# Now let's see some available validators, to get the idea of how to use those
# Note, we will not go through all available validators, because all validators work in same fashion
# Syntax and description of all validators are available in documentation

validation_parser = StringParser()\
    .not_null(default_value="NA")\
    .value_set(["Apple", "Google", "NA"])  # null check validation and allowed values validation is added
validation_parse_func = validation_parser(
)  # Yes, you can directly call the object to build the parser

valid_input_data = 'Apple'
output_for_valid_input_data = validation_parse_func(
    valid_input_data)  # Parse data
logging.info("====> Data Validation example:")
Exemple #13
0
# Adding custom validation that the first 3 digit is from a set of values
def _first_three_char_check(data: str):
    list_of_allowed_value: list = ['ABC', 'DEF']
    if not data:
        return data

    if str(data)[:3].upper() in [
            word.upper() for word in list_of_allowed_value
    ]:
        return data
    else:
        raise Exception("Bad prefix in the value.")


# Create and build the parser
parser = StringParser().add_func(_first_three_char_check)
parser_func = parser.build()

input_data = "ABC2344"
logging.info(('#' * 50) + " ADDING CUSTOM VALIDATOR " + ('#' * 50))
logging.info("====> Valid Input:")
logging.info("Input: {}".format(input_data))
logging.info("Output: {}".format(parser_func(input_data)))
logging.info('\n')

input_data = "PQR12344"
logging.info("====> Invalid Input:")
try:
    logging.info("Invalid Input: {}".format(input_data))
    parser_func(input_data)
except Exception as e:
Exemple #14
0
def _parity_check(data):
    if data:
        try:
            i_data = int(data)
        except:
            pass
        if i_data % 2 != 0:
            raise Exception("The data has to be even!")
    return data


# The cursors has to provided in a list, where each element of the list is a tuple.
# First element of each element is the column name, this is just for reference, no internal usage
# Second element of each tuple is the actual parser (parser objects, not built parser function)
schema = [
    ('ID', StringParser(quoted=1)),
    ('RUN_ID',
     StringParser().regex_match(r'\w+_\d{4}-\d{2}-\d{2}').change_case('u')),
    ('CLASS', StringParser(start=1, end=1).value_set(['a', 'b', 'A'])),
    ('INITIATED_ON', DatetimeParser(
        formats=['%Y%m%d', '%Y-%m-%d %H:%M:%S']).convert('%Y/%m/%d').max_value(
            datetime.datetime.now()).min_value(value='20000101',
                                               format='%Y%m%d').not_null(
                                                   datetime.datetime.strptime(
                                                       '19001231', '%Y%m%d'))),
    ('ASKED_AMOUNT',
     IntegerParser().max_value(2000).not_null(default_value=0)),
    ('ADJUSTED_AMOUNT', FloatParser().min_value(10.0).not_null(0.0)),
    ('ROLE_MODEL', ConstantParser('Iron-Man')),
    ('BLOCK_NUMBER', IntegerParser().add_func(_parity_check).range(0, 40))
]