Example #1
0
def test_merge_character_pos():
    x = StringTracker()
    y = StringTracker()
    data = ["abc abc", "93341-1", "912254", "bac tralalala"]
    data_2 = ["geometric inference ", "93341-1", "912254", "bac tralalala", "😀 this is a sale! a ❄️ sale!", "this is a long sentence that ends in an A", None]
    for record in data:
        x.update(record)
    for record in data_2:
        y.update(record)

    assert x.char_pos_tracker.char_pos_map["NITL"].count == 2
    assert x.char_pos_tracker.char_pos_map["NITL"].histogram.get_max_value() == 3
    assert x.char_pos_tracker.char_pos_map["NITL"].histogram.get_min_value() == 3

    assert x.char_pos_tracker.char_pos_map["a"].histogram.get_max_value() == 12
    assert x.char_pos_tracker.char_pos_map["a"].histogram.get_min_value() == 0
    assert y.char_pos_tracker.char_pos_map["NITL"].count == 22
    assert y.char_pos_tracker.char_pos_map["NITL"].histogram.get_max_value() == 39
    new_tracker = x.merge(y)

    assert new_tracker.char_pos_tracker.char_pos_map["a"].histogram.get_max_value() == 40

    assert new_tracker.char_pos_tracker.char_pos_map["NITL"].histogram.get_max_value() == 39
    assert new_tracker.char_pos_tracker.char_pos_map["NITL"].histogram.get_min_value() == 0
    assert new_tracker.char_pos_tracker.char_pos_map["NITL"].count == 24
Example #2
0
def test_merge_mod_character_lists():
    x = StringTracker()
    y = StringTracker()
    data = ["abc abc", "93341-1", "912254", "bac tralalala"]
    data_2 = [
        "geometric inference ", "93341-1", "912254", "bac tralalala",
        "😀 this is a sale! a ❄️ sale!",
        "this is a long sentence that ends in an A", None
    ]

    for record in data:
        x.update(record, character_list="ab")
    for record in data_2:
        y.update(record, character_list="a")

    assert x.char_pos_tracker.char_pos_map["NITL"].count == 23
    assert x.char_pos_tracker.char_pos_map["NITL"].histogram.get_max_value(
    ) == 11
    assert x.char_pos_tracker.char_pos_map["a"].histogram.get_max_value() == 12
    assert y.char_pos_tracker.char_pos_map["NITL"].count == 102
    assert y.char_pos_tracker.char_pos_map["a"].histogram.get_max_value() == 40
    assert y.char_pos_tracker.char_pos_map["NITL"].histogram.get_max_value(
    ) == 39

    x = x.merge(y)

    assert x.char_pos_tracker.char_pos_map["a"].histogram.get_max_value() == 40
    assert x.char_pos_tracker.char_pos_map["NITL"].histogram.get_max_value(
    ) == 39
    assert x.char_pos_tracker.char_pos_map["NITL"].count == 125
    assert x.token_length.histogram.get_max_value() == 10
Example #3
0
def test_string_tracker_merge():
    x = StringTracker()
    data = ["one", "two", "three", "one", "one", "One test", "six", None, None]
    for record in data:
        x.update(record)
    assert x.token_length.histogram.get_max_value() == 2
    assert x.token_length.histogram.get_min_value() == 1
    x2 = StringTracker()
    data = ["this is a long sentence that ends in an A"]
    for record in data:
        x2.update(record)
    assert x2.token_length.histogram.get_max_value() == 10
    assert x2.token_length.histogram.get_min_value() == 10
    new_string_track = x2.merge(x)
    assert new_string_track.token_length.histogram.get_max_value() == 10
    assert new_string_track.token_length.histogram.get_min_value() == 1