コード例 #1
0
        raise Exception("A Agg UDF with same name already exists %s" % udf.name)
      self.scalar_udfs[udf.name] = udf

  def __getitem__(self, name):
    if name in self.scalar_udfs:
      return self.scalar_udfs[name]
    if name in self.agg_udfs:
      return self.agg_udfs[name]
    raise Exception("Could not find UDF named %s" % name)


#
# Prepopulate registry with simple functions
#
registry = UDFRegistry.registry()
registry.add(ScalarUDF("lower", 1, lambda col: compute.utf8_lower(col.cast(string()))))
registry.add(ScalarUDF("upper", 1, lambda col: compute.utf8_upper(col.cast(string()))))

#
# Prepopulate with incremental aggregation functions
#

registry.add(AggUDF("count", 1, lambda col: compute.count(col).cast(float64())))
registry.add(AggUDF("avg", 1, lambda col: compute.mean(col).cast(float64())))
registry.add(AggUDF("sum", 1, lambda col: compute.sum(col).cast(float64())))

# Welford's algorithm for online std
std_init = lambda: [0, 0., 0]
def std_update(s, v):
  s[0] += 1
  d = v - s[1]
コード例 #2
0
 def _str_lower(self):
     return type(self)(pc.utf8_lower(self._data))
コード例 #3
0
 def _expr_kernel(self, arguments: Any, table: ArrowTable) -> Any:
     return pc.utf8_lower(*arguments)