def test_match_substring_regex(): arr = pa.array(["ab", "abc", "ba", "c", None]) result = pc.match_substring_regex(arr, "^a?b") expected = pa.array([True, True, True, False, None]) assert expected.equals(result) arr = pa.array(["aB", "Abc", "BA", "c", None]) result = pc.match_substring_regex(arr, "^a?b", ignore_case=True) expected = pa.array([True, True, True, False, None]) assert expected.equals(result) result = pc.match_substring_regex(arr, "^a?b", ignore_case=False) expected = pa.array([False, False, False, False, None]) assert expected.equals(result)
def _str_endswith(self, pat, na=None): if hasattr(pc, "match_substring_regex"): result = pc.match_substring_regex(self._data, re.escape(pat) + "$") result = BooleanDtype().__from_arrow__(result) if not isna(na): result[isna(result)] = bool(na) return result else: return super()._str_endswith(pat, na)
def _str_endswith(self, pat, na=None): if pa_version_under4p0: return super()._str_endswith(pat, na) result = pc.match_substring_regex(self._data, re.escape(pat) + "$") result = BooleanDtype().__from_arrow__(result) if not isna(na): result[isna(result)] = bool(na) return result
def _str_startswith(self, pat, na=None): # match_substring_regex added in pyarrow 4.0.0 if hasattr(pc, "match_substring_regex"): result = pc.match_substring_regex(self._data, "^" + re.escape(pat)) result = BooleanDtype().__from_arrow__(result) if not isna(na): result[isna(result)] = bool(na) return result else: return super()._str_startswith(pat, na)
return lib.map_infer_mask(arr, f, mask.view("uint8")) def _str_contains(self, pat, case=True, flags=0, na=np.nan, regex: bool = True): if flags: return super()._str_contains(pat, case, flags, na, regex) if regex: if pa_version_under4p0 or case is False: return super()._str_contains(pat, case, flags, na, regex) else: result = pc.match_substring_regex(self._data, pat) else: if case: result = pc.match_substring(self._data, pat) else: result = pc.match_substring(pc.utf8_upper(self._data), pat.upper()) result = BooleanDtype().__from_arrow__(result) if not isna(na): result[isna(result)] = bool(na) return result def _str_startswith(self, pat: str, na=None): if pa_version_under4p0: return super()._str_startswith(pat, na)
def test_match_substring_regex(): arr = pa.array(["ab", "abc", "ba", "c", None]) result = pc.match_substring_regex(arr, "^a?b") expected = pa.array([True, True, True, False, None]) assert expected.equals(result)