Esempio n. 1
0
 def _str_upper(self):
     return type(self)(pc.utf8_upper(self._data))
Esempio n. 2
0
      self.scalar_udfs[udf.name] = udf

  def __getitem__(self, name):
    if name in self.scalar_udfs:
      return self.scalar_udfs[name]
    if name in self.agg_udfs:
      return self.agg_udfs[name]
    raise Exception("Could not find UDF named %s" % name)


#
# Prepopulate registry with simple functions
#
registry = UDFRegistry.registry()
registry.add(ScalarUDF("lower", 1, lambda col: compute.utf8_lower(col.cast(string()))))
registry.add(ScalarUDF("upper", 1, lambda col: compute.utf8_upper(col.cast(string()))))

#
# Prepopulate with incremental aggregation functions
#

registry.add(AggUDF("count", 1, lambda col: compute.count(col).cast(float64())))
registry.add(AggUDF("avg", 1, lambda col: compute.mean(col).cast(float64())))
registry.add(AggUDF("sum", 1, lambda col: compute.sum(col).cast(float64())))

# Welford's algorithm for online std
std_init = lambda: [0, 0., 0]
def std_update(s, v):
  s[0] += 1
  d = v - s[1]
  s[1] += d / s[0]
Esempio n. 3
0
                      flags=0,
                      na=np.nan,
                      regex: bool = True):
        if flags:
            return super()._str_contains(pat, case, flags, na, regex)

        if regex:
            if pa_version_under4p0 or case is False:
                return super()._str_contains(pat, case, flags, na, regex)
            else:
                result = pc.match_substring_regex(self._data, pat)
        else:
            if case:
                result = pc.match_substring(self._data, pat)
            else:
                result = pc.match_substring(pc.utf8_upper(self._data),
                                            pat.upper())
        result = BooleanDtype().__from_arrow__(result)
        if not isna(na):
            result[isna(result)] = bool(na)
        return result

    def _str_startswith(self, pat: str, na=None):
        if pa_version_under4p0:
            return super()._str_startswith(pat, na)

        pat = "^" + re.escape(pat)
        return self._str_contains(pat, na=na, regex=True)

    def _str_endswith(self, pat: str, na=None):
        if pa_version_under4p0:
Esempio n. 4
0
 def _expr_kernel(self, arguments: Any, table: ArrowTable) -> Any:
     # return np.char.upper(*arguments)
     return pc.utf8_upper(*arguments)