Ejemplo n.º 1
0
class Quantile(Reduction):
    arg = rlz.any
    quantile = rlz.strict_numeric
    interpolation = rlz.isin(
        {'linear', 'lower', 'higher', 'midpoint', 'nearest'})

    output_dtype = dt.float64
Ejemplo n.º 2
0
class Bucket(BucketLike):
    arg = rlz.column(rlz.any)
    buckets = rlz.tuple_of(rlz.scalar(rlz.any))
    closed = rlz.optional(rlz.isin({'left', 'right'}), default='left')
    close_extreme = rlz.optional(rlz.instance_of(bool), default=True)
    include_under = rlz.optional(rlz.instance_of(bool), default=False)
    include_over = rlz.optional(rlz.instance_of(bool), default=False)

    def __init__(self, buckets, include_under, include_over, **kwargs):
        if not len(buckets):
            raise ValueError('Must be at least one bucket edge')
        elif len(buckets) == 1:
            if not include_under or not include_over:
                raise ValueError('If one bucket edge provided, must have '
                                 'include_under=True and include_over=True')
        super().__init__(
            buckets=buckets,
            include_under=include_under,
            include_over=include_over,
            **kwargs,
        )

    @property
    def nbuckets(self):
        return len(self.buckets) - 1 + self.include_over + self.include_under
Ejemplo n.º 3
0
class MultiQuantile(Quantile):
    arg = rlz.any
    quantile = rlz.value(dt.Array(dt.float64))
    interpolation = rlz.isin(
        {'linear', 'lower', 'higher', 'midpoint', 'nearest'})

    output_dtype = dt.Array(dt.float64)
Ejemplo n.º 4
0
class Correlation(Filterable, Reduction):
    """Coefficient of correlation of a set of number pairs."""

    left = rlz.column(rlz.numeric)
    right = rlz.column(rlz.numeric)
    how = rlz.isin({'sample', 'pop'})

    output_dtype = dt.float64
Ejemplo n.º 5
0
class Covariance(Filterable, Reduction):
    """Covariance of a set of number pairs."""

    left = rlz.column(rlz.numeric)
    right = rlz.column(rlz.numeric)
    how = rlz.isin({'sample', 'pop'})

    output_dtype = dt.float64
Ejemplo n.º 6
0
class TimestampFromUNIX(Value):
    arg = rlz.any
    # Only pandas-based backends support 'ns'
    unit = rlz.isin({'s', 'ms', 'us', 'ns'})
    output_shape = rlz.shape_like('arg')

    output_dtype = dt.timestamp
    output_shape = rlz.shape_like("args")
Ejemplo n.º 7
0
class VarianceBase(Filterable, Reduction):
    arg = rlz.column(rlz.numeric)
    how = rlz.isin({'sample', 'pop'})

    @immutable_property
    def output_dtype(self):
        if isinstance(self.arg, ir.DecimalValue):
            return self.arg.type().largest
        else:
            return dt.float64
Ejemplo n.º 8
0
class DropNa(TableNode, sch.HasSchema):
    """Drop null values in the table."""

    table = rlz.table
    how = rlz.isin({'any', 'all'})
    subset = rlz.optional(rlz.tuple_of(rlz.column_from("table")), default=())

    @property
    def schema(self):
        return self.table.schema()
Ejemplo n.º 9
0
class IntervalFromInteger(Value):
    arg = rlz.integer
    unit = rlz.isin({'Y', 'Q', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns'})

    output_shape = rlz.shape_like("arg")

    @immutable_property
    def output_dtype(self):
        return dt.Interval(self.unit, self.arg.type())

    @property
    def resolution(self):
        return self.output_dtype.resolution
Ejemplo n.º 10
0
class ParseURL(Value):
    arg = rlz.string
    extract = rlz.isin({
        'PROTOCOL',
        'HOST',
        'PATH',
        'REF',
        'AUTHORITY',
        'FILE',
        'USERINFO',
        'QUERY',
    })
    key = rlz.optional(rlz.string)

    output_shape = rlz.shape_like("arg")
    output_dtype = dt.string
Ejemplo n.º 11
0
class Histogram(BucketLike):
    arg = Arg(rlz.noop)
    nbins = Arg(rlz.noop, default=None)
    binwidth = Arg(rlz.noop, default=None)
    base = Arg(rlz.noop, default=None)
    closed = Arg(rlz.isin({'left', 'right'}), default='left')
    aux_hash = Arg(rlz.noop, default=None)

    def _validate(self):
        if self.nbins is None:
            if self.binwidth is None:
                raise ValueError('Must indicate nbins or binwidth')
        elif self.binwidth is not None:
            raise ValueError('nbins and binwidth are mutually exclusive')

    def output_type(self):
        # always undefined cardinality (for now)
        return dt.category.column_type()
Ejemplo n.º 12
0
class Bucket(BucketLike):
    arg = Arg(rlz.noop)
    buckets = Arg(rlz.noop)
    closed = Arg(rlz.isin({'left', 'right'}), default='left')
    close_extreme = Arg(bool, default=True)
    include_under = Arg(bool, default=False)
    include_over = Arg(bool, default=False)

    def _validate(self):
        if not len(self.buckets):
            raise ValueError('Must be at least one bucket edge')
        elif len(self.buckets) == 1:
            if not self.include_under or not self.include_over:
                raise ValueError('If one bucket edge provided, must have '
                                 'include_under=True and include_over=True')

    @property
    def nbuckets(self):
        return len(self.buckets) - 1 + self.include_over + self.include_under
Ejemplo n.º 13
0
class Histogram(BucketLike):
    arg = rlz.numeric
    nbins = rlz.optional(rlz.instance_of(int))
    binwidth = rlz.optional(rlz.scalar(rlz.numeric))
    base = rlz.optional(rlz.scalar(rlz.numeric))
    closed = rlz.optional(rlz.isin({'left', 'right'}), default='left')
    aux_hash = rlz.optional(rlz.instance_of(str))

    def __init__(self, nbins, binwidth, **kwargs):
        if nbins is None:
            if binwidth is None:
                raise ValueError('Must indicate nbins or binwidth')
        elif binwidth is not None:
            raise ValueError('nbins and binwidth are mutually exclusive')
        super().__init__(nbins=nbins, binwidth=binwidth, **kwargs)

    @property
    def output_dtype(self):
        # always undefined cardinality (for now)
        return dt.category
Ejemplo n.º 14
0
class ToIntervalUnit(Value):
    arg = rlz.interval
    unit = rlz.isin({'Y', 'Q', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns'})

    output_shape = rlz.shape_like("arg")

    def __init__(self, arg, unit):
        dtype = arg.type()
        if dtype.unit != unit:
            arg = util.convert_unit(arg, dtype.unit, unit)
        super().__init__(arg=arg, unit=unit)

    @immutable_property
    def output_dtype(self):
        dtype = self.arg.type()
        return dt.Interval(
            unit=self.unit,
            value_type=dtype.value_type,
            nullable=dtype.nullable,
        )
Ejemplo n.º 15
0
def test_invalid_isin(values, value, expected):
    with pytest.raises(expected):
        rlz.isin(values, value)
Ejemplo n.º 16
0
def test_valid_isin(values, value, expected):
    assert rlz.isin(values, value) == expected
Ejemplo n.º 17
0
class Arbitrary(Filterable, Reduction):
    arg = rlz.column(rlz.any)
    how = rlz.optional(rlz.isin({'first', 'last', 'heavy'}))
    output_dtype = rlz.dtype_like('arg')
Ejemplo n.º 18
0
def test_invalid_isin(values, value, expected):
    with pytest.raises(expected):
        rlz.isin(values, value)
Ejemplo n.º 19
0
class Hash(ValueOp):
    arg = Arg(rlz.any)
    how = Arg(rlz.isin({"fnv", "farm_fingerprint"}))
    output_type = rlz.shape_like("arg", dt.int64)
Ejemplo n.º 20
0
class HashBytes(ValueOp):
    arg = Arg(rlz.one_of([rlz.value(dt.string), rlz.value(dt.binary)]))
    how = Arg(rlz.isin({"sha256", "farm_fingerprint"}))
    output_type = rlz.shape_like("arg", "binary")
Ejemplo n.º 21
0
class TimeTruncate(Value):
    arg = rlz.time
    unit = rlz.isin(_time_units)

    output_shape = rlz.shape_like("arg")
    output_dtype = dt.time
Ejemplo n.º 22
0
class DateTruncate(Value):
    arg = rlz.date
    unit = rlz.isin(_date_units)

    output_shape = rlz.shape_like("arg")
    output_dtype = dt.date
Ejemplo n.º 23
0
def test_valid_isin(values, value, expected):
    assert rlz.isin(values, value) == expected
Ejemplo n.º 24
0
class Hash(Value):
    arg = rlz.any
    how = rlz.isin({'fnv', 'farm_fingerprint'})

    output_dtype = dt.int64
    output_shape = rlz.shape_like("arg")
Ejemplo n.º 25
0
class HashBytes(Value):
    arg = rlz.one_of({rlz.value(dt.string), rlz.value(dt.binary)})
    how = rlz.isin({'md5', 'sha1', 'sha256', 'sha512'})

    output_dtype = dt.binary
    output_shape = rlz.shape_like("arg")