def test_wrong_ints(self, no_header_schema, test_input):
     with pytest.raises(ParseErrors) as e:
         parse_csv([[test_input, "Foo", "true"]],
                   json.loads(no_header_schema))
         assert e.errors == [{
             "row":
             0,
             "column":
             "0",
             "message":
             f"invalid literal for int() with base 10: '{test_input}'",
         }]
 def test_wrong_floats(self, boligpriser_schema, boligpriser_header,
                       test_input):
     with pytest.raises(ParseErrors) as e:
         parse_csv(
             [["0001", "Trøndelag", test_input]],
             json.loads(boligpriser_schema),
             header=boligpriser_header,
         )
         assert e.errors == [{
             "row":
             0,
             "column":
             "pris",
             "message":
             f"could not convert string to float: '{test_input}'",
         }]
 def test_incorrect_date_colum(self, dates_header, dates_schema):
     csv_data = parse_csv(
         [["1", "2020", "garbish data", "2020-01-01T12:01:01"]],
         dates_schema,
         header=dates_header,
     )
     validation_errors = JsonSchemaValidator(dates_schema).validate(
         csv_data)
     assert len(validation_errors) == 1
 def test_incorrect_year_colum(self, dates_header, dates_schema):
     invalid_years = ["abc", ""]
     for invalid_year in invalid_years:
         csv_data = parse_csv(
             [["1", invalid_year, "2020-12-30", "2020-12-01T12:01:01"]],
             dates_schema,
             header=dates_header,
         )
         validation_errors = JsonSchemaValidator(dates_schema).validate(
             csv_data)
         assert len(validation_errors) == 1
 def test_valid_year_colum(self, dates_header, dates_schema):
     valid_years = ["2020", "-100", "9999"]
     for valid_year in valid_years:
         csv_data = parse_csv(
             [["1", valid_year, "2020-12-30", "2020-12-01T12:01:01"]],
             dates_schema,
             header=dates_header,
         )
         validation_errors = JsonSchemaValidator(dates_schema).validate(
             csv_data)
         assert len(validation_errors) == 0
def validate_csv(event, context):
    config = Config.from_lambda_event(event)

    step_config = StepConfig.from_task_config(config.task_config)

    s3_prefix = config.payload.output_dataset.s3_prefix

    log_add(
        header_row=step_config.header_row,
        delimiter=step_config.delimiter,
        quote=step_config.quote,
        schema=step_config.schema,
        output_prefix=s3_prefix,
    )

    if not step_config.schema:
        log_add(notice="No Schema provided for validation")
        config.payload.step_data.status = Status.VALIDATION_SUCCESS.value
        # 2020.06: Validation done optionally - we now return ok if we don't supply a
        # schema for the validation step
        return asdict(config.payload.step_data)

    input_prefix = next(
        iter(config.payload.step_data.s3_input_prefixes.values()))
    log_add(s3_input_prefix=input_prefix)
    objects = s3.list_objects_v2(Bucket=BUCKET, Prefix=input_prefix)

    s3_path = next(iter(objects["Contents"]))["Key"]
    log_add(s3_input_path=s3_path)

    response = s3.get_object(Bucket=BUCKET, Key=s3_path)
    reader = csv.reader(
        string_reader.from_response(response),
        dialect="unix",
        delimiter=step_config.delimiter,
        quotechar=step_config.quote,
    )
    header = None
    if step_config.header_row:
        header = next(reader)
    try:
        csv_data = parse_csv(reader, step_config.schema, header)
    except ParseErrors as p:
        return _with_error(config, p.errors)

    validation_errors = JsonSchemaValidator(
        step_config.schema).validate(csv_data)

    if validation_errors:
        return _with_error(config, errors=validation_errors)

    config.payload.step_data.status = Status.VALIDATION_SUCCESS.value
    return asdict(config.payload.step_data)
 def test_correct_dates(self, dates_header, dates_schema):
     csv_data = parse_csv(
         [
             ["1", "2020", "2020-01-01", "2020-01-01T12:01:01"],
             ["1", "2020", "2020-01-01", "2020-01-01 12:01:01"],
             ["1", "2020", "2020-01-01", "2020-12-01T12-01"],
         ],
         dates_schema,
         header=dates_header,
     )
     validation_errors = JsonSchemaValidator(dates_schema).validate(
         csv_data)
     assert len(validation_errors) == 0
 def test_valid_date_time_colum(self, dates_header, dates_schema):
     valid_dates = [
         "0009-12-01T12:01:01",
         "2020-12-12T12:01:01",
     ]
     for valid_date in valid_dates:
         csv_data = parse_csv(
             [["1", "2020", "2020-12-30", valid_date]],
             dates_schema,
             header=dates_header,
         )
         validation_errors = JsonSchemaValidator(dates_schema).validate(
             csv_data)
         assert len(validation_errors) == 0
 def test_incorrect_date_time_colum(self, dates_header, dates_schema):
     invalid_dates = [
         "2020-13-01T12:01:01",
         "2020 12 32T12:01:01",
         "garbish data",
     ]
     for invalid_date in invalid_dates:
         csv_data = parse_csv(
             [["1", "2020", "2020-12-30", invalid_date]],
             dates_schema,
             header=dates_header,
         )
         validation_errors = JsonSchemaValidator(dates_schema).validate(
             csv_data)
         assert len(validation_errors) == 1
 def test_parse_no_headers(self, no_header_schema):
     data = parse_csv(
         [["120", "Foo", "true"], ["999199", "Bar", "false"]],
         json.loads(no_header_schema),
     )
     assert data == [
         {
             "0": 120,
             "1": "Foo",
             "2": True
         },
         {
             "0": 999_199,
             "1": "Bar",
             "2": False
         },
     ]
 def test_parse_with_headers(self, boligpriser_schema, boligpriser_header):
     data = parse_csv(
         [
             ["001", "Østre byflak", "1010.01", "true"],
             ["002", "Hønse-Lovisaløkka", "5001,10", "false"],
         ],
         json.loads(boligpriser_schema),
         header=boligpriser_header,
     )
     assert data == [
         {
             "delbydel_id": "001",
             "navn": "Østre byflak",
             "pris": 1010.01,
             "til_salg": True,
         },
         {
             "delbydel_id": "002",
             "navn": "Hønse-Lovisaløkka",
             "pris": 5001.10,
             "til_salg": False,
         },
     ]
def test_empty_schema():
    data = parse_csv([["1", "foo"], ["2", "bar"]], {})
    assert data == [["1", "foo"], ["2", "bar"]]
def test_simple_array():
    data = parse_csv([["1", "foo"], ["2", "bar"]], {"type": "array"})
    assert data == [["1", "foo"], ["2", "bar"]]
 def test_parse_empty_values(self, no_header_schema):
     data = parse_csv([["55", "", "true"]], json.loads(no_header_schema))
     assert data == [{"0": 55, "2": True}]