Example #1
0
def test_match_random(seed):
    random.seed(seed)
    n = int(random.expovariate(0.001) + 100)
    k = random.randint(2, 12)
    random_re = ""
    random_len = 0
    while random_len < k:
        t = random.random()
        if t < 0.4:
            random_re += "."
        elif t < 0.6:
            random_re += random.choice("abcdefgh")
        elif t < 0.8:
            random_re += ".*"
            random_len += 3
        else:
            random_re += "\\w"
        random_len += 1
    random_rx = re.compile(random_re)

    src = [random_string(k) for _ in range(n)]
    DT = dt.Frame(A=src)
    res = DT[:, match(f.A, random_rx)]
    assert_equals(res,
                  dt.Frame(A=[bool(re.fullmatch(random_rx, s)) for s in src]))
Example #2
0
def test_match_bad_regex3():
    DT = dt.Frame(A=["abc"])
    with pytest.raises(ValueError):
        assert DT[match(f.A, "???"), :]
Example #3
0
def test_match_bad_icase():
    DT = dt.Frame(A=["abc"])
    with pytest.raises(TypeError):
        assert DT[match(f.A, "a", icase=1), :]
Example #4
0
def test_match_case_insensitive():
    DT = dt.Frame(A=["This is an Apple", "banana", "apPle", "Which apple?"])
    DT1 = DT[:, match(f.A, ".*apPle.*")]
    DT2 = DT[:, match(f.A, ".*apPle.*", icase=True)]
    assert_equals(DT1, dt.Frame(A=[False, False, True, False]))
    assert_equals(DT2, dt.Frame(A=[True, False, True, True]))
Example #5
0
def test_match_ignore_groups():
    # Groups within the regular expression ought to be ignored
    DT = dt.Frame(list("abcdibaldfn"))
    DT1 = DT[match(f[0], "([a-c]+)"), :]
    assert_equals(DT1, dt.Frame(["a", "b", "c", "b", "a"]))
Example #6
0
def test_match_entire_string():
    # match() matches the entire string, not just the beginning
    DT = dt.Frame(A=["a", "ab", "abc", "aaaa"])
    DT1 = DT[:, match(f.A, "a.?")]
    assert_equals(DT1, dt.Frame(A=[True, True, False, False]))
Example #7
0
def test_match_simple():
    DT = dt.Frame(A=["abc", "abd", "cab", "acc", None, "aaa"])
    DT1 = DT[:, match(f.A, "ab.")]
    assert_equals(DT1, dt.Frame(A=[True, True, False, False, None, False]))
Example #8
0
def test_match_repr():
    assert str(match(f.A,
                     "abc")) == r"FExpr<re.match(f.A, r'abc', icase=False)>"
    assert str(
        match(f.A, r"\d+",
              icase=True)) == r"FExpr<re.match(f.A, r'\d+', icase=True)>"