Exemple #1
0
def etl():
    return sequential(
        csv_to_map([
            'date', 'file', 'date2', 'log', 'app', 'beat', 'front', 'is_log',
            'msg', 'offset', 'arch'
        ]), keep(["msg"]),
        append(['msg'], wrap(lambda x: dict(Counter(x.lower())))))
def test_some_working_remove_non_existing_columns():
    date_formats = [
        "%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%S",
        "%m%d", "%Y-%m-%d", "%Y%m%d"
    ]

    inp = {"date": "20171010"}
    operation = append(fields=["date"],
                       etl_func=compose(date_parser(date_formats),
                                        explode_date,
                                        remove_columns("hora", "dia")))
    (res, err) = operation(inp)
    assert inp is not None
    assert inp["date"] == "20171010"
    assert res is not None
    assert "second" in res
    assert "minute" in res
    assert "hour" in res
    assert "day" in res
    assert "month" in res
    assert "year" in res
    assert res["second"] == 0
    assert res["minute"] == 0
    assert res["hour"] == 0
    assert res["day"] == 10
    assert res["month"] == 10
    assert res["year"] == 2017
    assert err is None
def test_field_not_found():
    operation = append(["offer"], explode("offer"))
    (res, err) = operation({"one": 1})
    assert res is None
    assert err is not None
    assert "offer" in err
    assert err["offer"] == "offer not found"
def test_empty():
    time_formats = ["%H%M%S"]

    operation = append(fields=["time"], etl_func=time_parser(time_formats))
    (res, err) = operation(None)
    assert res is None
    assert err == {'time': "Time can't be None: None"}
def test_multiple_working():
    imp = {"hello": "world", "goodbye": "sadness"}
    op = append(["hello", "goodbye"], wrap(head_field_and_tail))

    (res, err) = op(imp)

    assert err is None
    assert res == {"w": "orld", "s": "adness"}
def test_some_working():
    imp = {"hello": "world"}
    op = append(["hello"], wrap(head_and_tail))

    (res, err) = op(imp)

    assert res == {"head": "w", "tail": "orld"}
    assert err is None
def test_empty():
    imp = None
    op = append(["hello"], wrap(head_and_tail))

    (res, err) = op(imp)

    assert res is None
    assert err == {"hello": "hello not found"}
Exemple #8
0
def test_empty():
    date_formats = ["%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S.%fZ",
                    "%Y-%m-%dT%H:%M:%S", "%m%d", "%Y-%m-%d", "%Y%m%d"]

    operation = append(fields=["date"], etl_func=date_parser(date_formats))
    (res, err) = operation(None)

    assert res is None
    assert err == {'date': "Date can't be None: None"}
Exemple #9
0
def test_date_format_incorrect():
    date_formats = ["%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S.%fZ",
                    "%Y-%m-%dT%H:%M:%S", "%m%d", "%Y-%m-%d", "%Y%m%d"]
    inp = {"date": "2017,10,10"}
    operation = append(fields=["date"], etl_func=date_parser(date_formats))
    (res, err) = operation(inp)
    assert inp is not None
    assert inp["date"] == "2017,10,10"
    assert res is None
    assert err is not None
    assert err["date"] == "Can not parse date 2017,10,10"
def test_empty_time_formats():
    time_formats = None

    inp = {"time": "202020"}
    operation = append(fields=["time"], etl_func=time_parser(time_formats))
    (res, err) = operation(inp)
    assert inp is not None
    assert inp["time"] == "202020"
    assert res is None
    assert err is not None
    assert err["time"] == "Time formats can't be None"
def test_time_format_incorrect():
    time_formats = ["%H%M%S"]

    inp = {"time": "20,20"}
    operation = append(fields=["time"], etl_func=time_parser(time_formats))
    (res, err) = operation(inp)

    assert inp is not None
    assert inp["time"] == "20,20"
    assert res is None
    assert err is not None
    assert err["time"] == "Can not parse time 20,20"
Exemple #12
0
def test_empty_date_formats():
    date_formats = None

    inp = {"date": "20171010"}
    operation = append(fields=["date"], etl_func=date_parser(date_formats))

    (res, err) = operation(inp)
    assert inp is not None
    assert inp["date"] == "20171010"
    assert res is None
    assert err is not None
    assert err["date"] == "Date formats can't be None"
def test_some_working():
    time_formats = ["%H%M%S"]
    inp = {"time": "202020"}
    operation = append(fields=["time"],
                       etl_func=compose(time_parser(time_formats),
                                        explode_time))
    (res, err) = operation(inp)
    assert inp is not None
    assert inp["time"] == "202020"
    assert res is not None
    assert res["second"] == 20
    assert res["minute"] == 20
    assert res["hour"] == 20
    assert err is None
Exemple #14
0
def test_some_working():
    date_formats = ["%Y-%m-%dT%H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S.%fZ",
                    "%Y-%m-%dT%H:%M:%S", "%m%d", "%Y-%m-%d", "%Y%m%d"]
    inp = {"date": "20171010"}
    operation = append(fields=["date"], etl_func=compose(date_parser(date_formats), explode_date))
    (res, err) = operation(inp)
    assert inp is not None
    assert inp["date"] == "20171010"
    assert res is not None
    assert res["second"] == 0
    assert res["minute"] == 0
    assert res["hour"] == 0
    assert res["day"] == 10
    assert res["month"] == 10
    assert res["year"] == 2017
    assert err is None
def test_field_list_two_rows_two_different_fields():
    operation = append(["nested"], explode("nested"))
    (res, err) = operation(
        {"nested": [{
            "one": 1,
            "two": 2
        }, {
            "one": 1,
            "three": 3
        }]})

    expected = {
        "nested_one": 1,
        "nested_one_1": 1,
        "nested_two": 2,
        "nested_three_1": 3
    }
    check_dict_by_field(res, expected)
Exemple #16
0
def test_sequential_use():
    date_formats = ["%Y-%m-%d"]
    inp = {"date": "2017-10-10"}
    operation = append(fields=["date"], etl_func=compose(date_parser(date_formats), explode_date))
    (res, err) = operation(inp)
    assert inp is not None
    assert inp["date"] == "2017-10-10"
    assert res is not None
    assert res["second"] == 0
    assert res["minute"] == 0
    assert res["hour"] == 0
    assert res["day"] == 10
    assert res["month"] == 10
    assert res["year"] == 2017
    assert err is None

    inp = {"date": "0"}
    (res, err) = (None, None)
    (res, err) = operation(inp)
    assert inp is not None
    assert res is None
    assert err == {'date': 'Can not parse date 0'}

    inp = {"date": "2017-10-10"}
    (res, err) = (None, None)
    (res, err) = operation(inp)
    assert inp is not None
    assert inp["date"] == "2017-10-10"
    assert res is not None
    assert res["second"] == 0
    assert res["minute"] == 0
    assert res["hour"] == 0
    assert res["day"] == 10
    assert res["month"] == 10
    assert res["year"] == 2017
    assert err is None
def test_field_list_two_rows_one_field():
    operation = append(["nested"], explode("nested"))
    (res, err) = operation({"nested": [{"one": 1}, {"one": 1}]})

    expected = {"nested_one": 1, "nested_one_1": 1}
    check_dict_by_field(res, expected)
def test_field_object_two_fields():
    operation = append(["nested"], explode("nested"))
    (res, err) = operation({"nested": {"one": 1, "two": 2}})

    expected = {"nested_one": 1, "nested_two": 2}
    check_dict_by_field(res, expected)
def test_empty():
    operation = append(["a"], explode("a"))
    (res, err) = operation(None)
    assert res is None