Example #1
0
def new_dataset(expr, deltas):
    """Creates or returns a dataset from a pair of blaze expressions.

    Parameters
    ----------
    expr : Expr
       The blaze expression representing the first known values.
    deltas : Expr
       The blaze expression representing the deltas to the data.

    Returns
    -------
    ds : type
        A new dataset type.

    Notes
    -----
    This function is memoized. repeated calls with the same inputs will return
    the same type.
    """
    columns = {}
    for name, type_ in expr.dshape.measure.fields:
        try:
            if promote(type_, float64, promote_option=False) != float64:
                raise NotPipelineCompatible()
            if isinstance(type_, Option):
                type_ = type_.ty
        except NotPipelineCompatible:
            col = NonPipelineField(name, type_)
        except TypeError:
            col = NonNumpyField(name, type_)
        else:
            col = Column(type_.to_numpy_dtype().type)

        columns[name] = col

    name = expr._name
    if name is None:
        name = next(_new_names)

    # unicode is a name error in py3 but the branch is only hit
    # when we are in python 2.
    if PY2 and isinstance(name, unicode):  # noqa
        name = name.encode('utf-8')

    return type(name, (DataSet,), columns)
Example #2
0
def coalesce(a, b):
    a_dshape = discover(a)
    a_measure = a_dshape.measure
    isoption = isinstance(a_measure, Option)
    if isoption:
        a_measure = a_measure.ty
    isnull = isinstance(a_measure, Null)
    if isnull:
        # a is always null, this is just b
        return b

    if not isoption:
        # a is not an option, this is just a
        return a

    b_dshape = discover(b)
    return Coalesce(a, b, DataShape(*(
        maxshape((a_dshape.shape, b_dshape.shape)) +
        (promote(a_measure, b_dshape.measure),)
    )))
Example #3
0
def promote(type1, type2):
    """Promote two types to a common type"""
    from flypy.compiler.typing import inference

    if type1 == type2:
        return type1
    elif (type(type1), type(type2)) == (inference.Method, inference.Method):
        # promote Method types
        # TODO: Bit of a hack, do this better
        func1, obj1 = type1.parameters
        func2, obj2 = type2.parameters
        result = promote(obj1, obj2)
        if result == obj1:
            return type1
        elif result == obj2:
            return type2
        else:
            raise TypeError("Cannot promote methods %s and %s" % (type1, type2))
    else:
        t1, t2 = to_blaze(type1), to_blaze(type2)
        result = ds.promote(t1, t2)
        return resolve_type(result)
Example #4
0
def promote(type1, type2):
    """Promote two types to a common type"""
    from flypy.compiler.typing import inference

    if type1 == type2:
        return type1
    elif (type(type1), type(type2)) == (inference.Method, inference.Method):
        # promote Method types
        # TODO: Bit of a hack, do this better
        func1, obj1 = type1.parameters
        func2, obj2 = type2.parameters
        result = promote(obj1, obj2)
        if result == obj1:
            return type1
        elif result == obj2:
            return type2
        else:
            raise TypeError("Cannot promote methods %s and %s" %
                            (type1, type2))
    else:
        t1, t2 = to_blaze(type1), to_blaze(type2)
        result = ds.promote(t1, t2)
        return resolve_type(result)
Example #5
0
    def schema(self):
        """

        Examples
        --------
        >>> from blaze import symbol
        >>> t = symbol('t', 'var * {name: string, amount: int}')
        >>> s = symbol('t', 'var * {name: string, id: int}')

        >>> join(t, s).schema
        dshape("{name: string, amount: int32, id: int32}")

        >>> join(t, s, how='left').schema
        dshape("{name: string, amount: int32, id: ?int32}")

        Overlapping but non-joined fields append _left, _right

        >>> a = symbol('a', 'var * {x: int, y: int}')
        >>> b = symbol('b', 'var * {x: int, y: int}')
        >>> join(a, b, 'x').fields
        ['x', 'y_left', 'y_right']
        """
        option = lambda dt: dt if isinstance(dt, Option) else Option(dt)

        on_left = self.on_left
        if not isinstance(on_left, list):
            on_left = on_left,

        on_right = self.on_right
        if not isinstance(on_right, list):
            on_right = on_right,

        right_types = keymap(
            dict(zip(on_right, on_left)).get,
            self.rhs.dshape.measure.dict,
        )
        joined = (
            (name, promote(dt, right_types[name], promote_option=False))
            for n, (name, dt) in enumerate(filter(
                compose(op.contains(on_left), first),
                self.lhs.dshape.measure.fields,
            ))
        )

        left = [
            (name, dt) for name, dt in zip(
                self.lhs.fields,
                types_of_fields(self.lhs.fields, self.lhs)
            ) if name not in on_left
        ]

        right = [
            (name, dt) for name, dt in zip(
                self.rhs.fields,
                types_of_fields(self.rhs.fields, self.rhs)
            ) if name not in on_right
        ]

        # Handle overlapping but non-joined case, e.g.
        left_other = set(name for name, dt in left if name not in on_left)
        right_other = set(name for name, dt in right if name not in on_right)
        overlap = left_other & right_other

        left_suffix, right_suffix = self.suffixes
        left = ((name + left_suffix if name in overlap else name, dt)
                for name, dt in left)
        right = ((name + right_suffix if name in overlap else name, dt)
                 for name, dt in right)

        if self.how in ('right', 'outer'):
            left = ((name, option(dt)) for name, dt in left)
        if self.how in ('left', 'outer'):
            right = ((name, option(dt)) for name, dt in right)

        return dshape(Record(chain(joined, left, right)))
Example #6
0
def test_simple():
    x = int64
    y = float32
    z = promote(x, y)
    assert z == float64
Example #7
0
def test_no_promote_option():
    x = int64
    y = Option(float64)
    z = promote(x, y, promote_option=False)
    assert z == float64
Example #8
0
def test_option():
    x = int64
    y = Option(float32)
    z = promote(x, y)
    assert z == Option(float64)
Example #9
0
 def _dtype(self):
     # we can't simply use .schema or .datashape because we may have a bare
     # integer, for example
     lhs, rhs = discover(self.lhs).measure, discover(self.rhs).measure
     return promote(lhs, rhs)
Example #10
0
    def schema(self):
        """

        Examples
        --------
        >>> from blaze import symbol
        >>> t = symbol('t', 'var * {name: string, amount: int}')
        >>> s = symbol('t', 'var * {name: string, id: int}')

        >>> join(t, s).schema
        dshape("{name: string, amount: int32, id: int32}")

        >>> join(t, s, how='left').schema
        dshape("{name: string, amount: int32, id: ?int32}")

        Overlapping but non-joined fields append _left, _right

        >>> a = symbol('a', 'var * {x: int, y: int}')
        >>> b = symbol('b', 'var * {x: int, y: int}')
        >>> join(a, b, 'x').fields
        ['x', 'y_left', 'y_right']
        """

        option = lambda dt: dt if isinstance(dt, Option) else Option(dt)

        on_left = self.on_left
        if not isinstance(on_left, list):
            on_left = on_left,

        on_right = self.on_right
        if not isinstance(on_right, list):
            on_right = on_right,

        right_types = keymap(
            dict(zip(on_right, on_left)).get,
            self.rhs.dshape.measure.dict,
        )
        joined = ((name, promote(dt, right_types[name], promote_option=False))
                  for n, (name, dt) in enumerate(
                      filter(
                          compose(op.contains(on_left), first),
                          self.lhs.dshape.measure.fields,
                      )))

        left = [(name, dt) for name, dt in zip(
            self.lhs.fields, types_of_fields(self.lhs.fields, self.lhs))
                if name not in on_left]

        right = [(name, dt) for name, dt in zip(
            self.rhs.fields, types_of_fields(self.rhs.fields, self.rhs))
                 if name not in on_right]

        # Handle overlapping but non-joined case, e.g.
        left_other = set(name for name, dt in left if name not in on_left)
        right_other = set(name for name, dt in right if name not in on_right)
        overlap = left_other & right_other

        left_suffix, right_suffix = self.suffixes
        left = ((name + left_suffix if name in overlap else name, dt)
                for name, dt in left)
        right = ((name + right_suffix if name in overlap else name, dt)
                 for name, dt in right)

        if self.how in ('right', 'outer'):
            left = ((name, option(dt)) for name, dt in left)
        if self.how in ('left', 'outer'):
            right = ((name, option(dt)) for name, dt in right)

        return dshape(Record(chain(joined, left, right)))
Example #11
0
 def _dtype(self):
     # we can't simply use .schema or .datashape because we may have a bare
     # integer, for example
     lhs, rhs = discover(self.lhs).measure, discover(self.rhs).measure
     return promote(lhs, rhs)
Example #12
0
def new_dataset(expr, deltas, missing_values):
    """
    Creates or returns a dataset from a pair of blaze expressions.

    Parameters
    ----------
    expr : Expr
        The blaze expression representing the first known values.
    deltas : Expr
        The blaze expression representing the deltas to the data.
    missing_values : frozenset((name, value) pairs
        Association pairs column name and missing_value for that column.

        This needs to be a frozenset rather than a dict or tuple of tuples
        because we want a collection that's unordered but still hashable.

    Returns
    -------
    ds : type
        A new dataset type.

    Notes
    -----
    This function is memoized. repeated calls with the same inputs will return
    the same type.
    """
    missing_values = dict(missing_values)
    columns = {}
    for name, type_ in expr.dshape.measure.fields:
        # Don't generate a column for sid or timestamp, since they're
        # implicitly the labels if the arrays that will be passed to pipeline
        # Terms.
        if name in (SID_FIELD_NAME, TS_FIELD_NAME):
            continue
        try:
            # TODO: This should support datetime and bool columns.
            if promote(type_, float64, promote_option=False) != float64:
                raise NotPipelineCompatible()
            if isinstance(type_, Option):
                type_ = type_.ty
        except NotPipelineCompatible:
            col = NonPipelineField(name, type_)
        except TypeError:
            col = NonNumpyField(name, type_)
        else:
            col = Column(
                type_.to_numpy_dtype(),
                missing_values.get(name, NotSpecified),
            )

        columns[name] = col

    name = expr._name
    if name is None:
        name = next(_new_names)

    # unicode is a name error in py3 but the branch is only hit
    # when we are in python 2.
    if PY2 and isinstance(name, unicode):  # noqa
        name = name.encode('utf-8')

    return type(name, (DataSet,), columns)
Example #13
0
def test_promote_string_with_option(x, y, p, r):
    assert (promote(x, y, promote_option=p) ==
            promote(y, x, promote_option=p) ==
            r)
Example #14
0
def test_promote_datetime_with_option(x, y, p, r):
    assert (promote(x, y, promote_option=p) ==
            promote(y, x, promote_option=p) ==
            r)
Example #15
0
def test_promote_datetime_with_option(x, y, p, r):
    assert (promote(x, y, promote_option=p) == promote(y, x, promote_option=p)
            == r)
Example #16
0
def test_promote_string_with_option(x, y, p, r):
    assert (promote(x, y, promote_option=p) == promote(y, x, promote_option=p)
            == r)