def etl(): return sequential( csv_to_map([ 'date', 'file', 'date2', 'log', 'app', 'beat', 'front', 'is_log', 'msg', 'offset', 'arch' ]), keep(["msg"]), append(['msg'], wrap(lambda x: dict(Counter(x.lower())))))
def test_some_working_remove_non_existing_columns(): date_formats = [ "%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%S", "%m%d", "%Y-%m-%d", "%Y%m%d" ] inp = {"date": "20171010"} operation = append(fields=["date"], etl_func=compose(date_parser(date_formats), explode_date, remove_columns("hora", "dia"))) (res, err) = operation(inp) assert inp is not None assert inp["date"] == "20171010" assert res is not None assert "second" in res assert "minute" in res assert "hour" in res assert "day" in res assert "month" in res assert "year" in res assert res["second"] == 0 assert res["minute"] == 0 assert res["hour"] == 0 assert res["day"] == 10 assert res["month"] == 10 assert res["year"] == 2017 assert err is None
def test_field_not_found(): operation = append(["offer"], explode("offer")) (res, err) = operation({"one": 1}) assert res is None assert err is not None assert "offer" in err assert err["offer"] == "offer not found"
def test_empty(): time_formats = ["%H%M%S"] operation = append(fields=["time"], etl_func=time_parser(time_formats)) (res, err) = operation(None) assert res is None assert err == {'time': "Time can't be None: None"}
def test_multiple_working(): imp = {"hello": "world", "goodbye": "sadness"} op = append(["hello", "goodbye"], wrap(head_field_and_tail)) (res, err) = op(imp) assert err is None assert res == {"w": "orld", "s": "adness"}
def test_some_working(): imp = {"hello": "world"} op = append(["hello"], wrap(head_and_tail)) (res, err) = op(imp) assert res == {"head": "w", "tail": "orld"} assert err is None
def test_empty(): imp = None op = append(["hello"], wrap(head_and_tail)) (res, err) = op(imp) assert res is None assert err == {"hello": "hello not found"}
def test_empty(): date_formats = ["%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%S", "%m%d", "%Y-%m-%d", "%Y%m%d"] operation = append(fields=["date"], etl_func=date_parser(date_formats)) (res, err) = operation(None) assert res is None assert err == {'date': "Date can't be None: None"}
def test_date_format_incorrect(): date_formats = ["%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%S", "%m%d", "%Y-%m-%d", "%Y%m%d"] inp = {"date": "2017,10,10"} operation = append(fields=["date"], etl_func=date_parser(date_formats)) (res, err) = operation(inp) assert inp is not None assert inp["date"] == "2017,10,10" assert res is None assert err is not None assert err["date"] == "Can not parse date 2017,10,10"
def test_empty_time_formats(): time_formats = None inp = {"time": "202020"} operation = append(fields=["time"], etl_func=time_parser(time_formats)) (res, err) = operation(inp) assert inp is not None assert inp["time"] == "202020" assert res is None assert err is not None assert err["time"] == "Time formats can't be None"
def test_time_format_incorrect(): time_formats = ["%H%M%S"] inp = {"time": "20,20"} operation = append(fields=["time"], etl_func=time_parser(time_formats)) (res, err) = operation(inp) assert inp is not None assert inp["time"] == "20,20" assert res is None assert err is not None assert err["time"] == "Can not parse time 20,20"
def test_empty_date_formats(): date_formats = None inp = {"date": "20171010"} operation = append(fields=["date"], etl_func=date_parser(date_formats)) (res, err) = operation(inp) assert inp is not None assert inp["date"] == "20171010" assert res is None assert err is not None assert err["date"] == "Date formats can't be None"
def test_some_working(): time_formats = ["%H%M%S"] inp = {"time": "202020"} operation = append(fields=["time"], etl_func=compose(time_parser(time_formats), explode_time)) (res, err) = operation(inp) assert inp is not None assert inp["time"] == "202020" assert res is not None assert res["second"] == 20 assert res["minute"] == 20 assert res["hour"] == 20 assert err is None
def test_some_working(): date_formats = ["%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%S", "%m%d", "%Y-%m-%d", "%Y%m%d"] inp = {"date": "20171010"} operation = append(fields=["date"], etl_func=compose(date_parser(date_formats), explode_date)) (res, err) = operation(inp) assert inp is not None assert inp["date"] == "20171010" assert res is not None assert res["second"] == 0 assert res["minute"] == 0 assert res["hour"] == 0 assert res["day"] == 10 assert res["month"] == 10 assert res["year"] == 2017 assert err is None
def test_field_list_two_rows_two_different_fields(): operation = append(["nested"], explode("nested")) (res, err) = operation( {"nested": [{ "one": 1, "two": 2 }, { "one": 1, "three": 3 }]}) expected = { "nested_one": 1, "nested_one_1": 1, "nested_two": 2, "nested_three_1": 3 } check_dict_by_field(res, expected)
def test_sequential_use(): date_formats = ["%Y-%m-%d"] inp = {"date": "2017-10-10"} operation = append(fields=["date"], etl_func=compose(date_parser(date_formats), explode_date)) (res, err) = operation(inp) assert inp is not None assert inp["date"] == "2017-10-10" assert res is not None assert res["second"] == 0 assert res["minute"] == 0 assert res["hour"] == 0 assert res["day"] == 10 assert res["month"] == 10 assert res["year"] == 2017 assert err is None inp = {"date": "0"} (res, err) = (None, None) (res, err) = operation(inp) assert inp is not None assert res is None assert err == {'date': 'Can not parse date 0'} inp = {"date": "2017-10-10"} (res, err) = (None, None) (res, err) = operation(inp) assert inp is not None assert inp["date"] == "2017-10-10" assert res is not None assert res["second"] == 0 assert res["minute"] == 0 assert res["hour"] == 0 assert res["day"] == 10 assert res["month"] == 10 assert res["year"] == 2017 assert err is None
def test_field_list_two_rows_one_field(): operation = append(["nested"], explode("nested")) (res, err) = operation({"nested": [{"one": 1}, {"one": 1}]}) expected = {"nested_one": 1, "nested_one_1": 1} check_dict_by_field(res, expected)
def test_field_object_two_fields(): operation = append(["nested"], explode("nested")) (res, err) = operation({"nested": {"one": 1, "two": 2}}) expected = {"nested_one": 1, "nested_two": 2} check_dict_by_field(res, expected)
def test_empty(): operation = append(["a"], explode("a")) (res, err) = operation(None) assert res is None