Ejemplo n.º 1
0
Archivo: sql.py Proyecto: pfjob09/blaze
def post_compute(expr, query, scope=None):
    """ Execute SQLAlchemy query against SQLAlchemy engines

    If the result of compute is a SQLAlchemy query then it is likely that the
    data elements are themselves SQL objects which contain SQLAlchemy engines.
    We find these engines and, if they are all the same, run the query against
    these engines and return the result.
    """
    if not all(
            isinstance(val, (Engine, MetaData, Table))
            for val in scope.values()):
        return query

    engines = set(filter(None, map(engine_of, scope.values())))

    if not engines:
        return query

    if len(set(map(str, engines))) != 1:
        raise NotImplementedError("Expected single SQLAlchemy engine")

    engine = toolz.first(engines)

    with engine.connect() as conn:  # Perform query
        result = conn.execute(select(query)).fetchall()

    if isscalar(expr.dshape):
        return result[0][0]
    if isscalar(expr.dshape.measure):
        return [x[0] for x in result]
    return result
Ejemplo n.º 2
0
def post_compute(expr, query, scope=None):
    """ Execute SQLAlchemy query against SQLAlchemy engines

    If the result of compute is a SQLAlchemy query then it is likely that the
    data elements are themselves SQL objects which contain SQLAlchemy engines.
    We find these engines and, if they are all the same, run the query against
    these engines and return the result.
    """
    if not all(isinstance(val, (MetaData, Engine, Table)) for val in scope.values()):
        return query

    engines = set(filter(None, map(engine_of, scope.values())))

    if not engines:
        return query

    if len(set(map(str, engines))) != 1:
        raise NotImplementedError("Expected single SQLAlchemy engine")

    engine = first(engines)

    with engine.connect() as conn:  # Perform query
        result = conn.execute(select(query)).fetchall()

    if isscalar(expr.dshape):
        return result[0][0]
    if isscalar(expr.dshape.measure):
        return [x[0] for x in result]
    return result
Ejemplo n.º 3
0
def post_compute(e, q, d):
    """
    Execute a query using MongoDB's aggregation pipeline

    The compute_up functions operate on Mongo Collection / list-of-dict
    queries.  Once they're done we need to actually execute the query on
    MongoDB.  We do this using the aggregation pipeline framework.

    http://docs.mongodb.org/manual/core/aggregation-pipeline/
    """
    d = {'$project': toolz.merge({'_id': 0},  # remove mongo identifier
                                 dict((col, 1) for col in e.fields))}
    q = q.append(d)

    if not e.dshape.shape:  # not a collection
        result = q.coll.aggregate(list(q.query))['result'][0]
        if isscalar(e.dshape.measure):
            return result[e._name]
        else:
            return get(e.fields, result)

    dicts = q.coll.aggregate(list(q.query))['result']

    if isscalar(e.dshape.measure):
        return list(pluck(e.fields[0], dicts, default=None))  # dicts -> values
    else:
        return list(pluck(e.fields, dicts, default=None))  # dicts -> tuples
Ejemplo n.º 4
0
def expr_repr(expr, n=10):
    # Pure Expressions, not interactive
    if not expr._resources():
        return str(expr)

    # Scalars
    if ndim(expr) == 0 and isscalar(expr.dshape):
        return repr(coerce_scalar(compute(expr), str(expr.dshape)))

    # Tables
    if (ndim(expr) == 1 and (istabular(expr.dshape) or
                             isscalar(expr.dshape.measure))):
        return repr_tables(expr, 10)

    # Smallish arrays
    if ndim(expr) >= 2 and numel(expr.shape) and numel(expr.shape) < 1000000:
        return repr(compute(expr))

    # Other
    dat = expr._resources().values()
    if len(dat) == 1:
        dat = list(dat)[0]  # may be dict_values

    s = 'Data:       %s' % dat
    if not isinstance(expr, Symbol):
        s += '\nExpr:       %s' % str(expr)
    s += '\nDataShape:  %s' % short_dshape(expr.dshape, nlines=7)

    return s
Ejemplo n.º 5
0
def expr_repr(expr, n=10):
    # Pure Expressions, not interactive
    if not expr._resources():
        return str(expr)

    # Scalars
    if ndim(expr) == 0 and isscalar(expr.dshape):
        return repr(coerce_scalar(compute(expr), str(expr.dshape)))

    # Tables
    if (ndim(expr) == 1
            and (istabular(expr.dshape) or isscalar(expr.dshape.measure))):
        return repr_tables(expr, 10)

    # Smallish arrays
    if ndim(expr) >= 2 and numel(expr.shape) and numel(expr.shape) < 1000000:
        return repr(compute(expr))

    # Other
    dat = expr._resources().values()
    if len(dat) == 1:
        dat = list(dat)[0]  # may be dict_values

    s = 'Data:       %s' % dat
    if not isinstance(expr, Symbol):
        s += '\nExpr:       %s' % str(expr)
    s += '\nDataShape:  %s' % short_dshape(expr.dshape, nlines=7)

    return s
Ejemplo n.º 6
0
def _eq(self, other):
    if (isscalar(self.dshape.measure) and
            (not isinstance(other, Expr)
                 or isscalar(other.dshape.measure))):
        return broadcast(Eq, self, other)
    else:
        return self.isidentical(other)
Ejemplo n.º 7
0
def compute_up(t, rdd, **kwargs):
    grouper = optimize(t.grouper, rdd)
    apply = optimize(t.apply, rdd)
    t = by(grouper, apply)
    if ((isinstance(t.apply, Reduction) and type(t.apply) in binops) or
        (isinstance(t.apply, Summary) and
         builtins.all(type(val) in binops for val in t.apply.values))):
        grouper, binop, combiner, initial = reduce_by_funcs(t)

        if isscalar(t.grouper.dshape.measure):
            keyfunc = lambda x: (x,)
        else:
            keyfunc = identity
        if isscalar(t.apply.dshape.measure):
            valfunc = lambda x: (x,)
        else:
            valfunc = identity
        unpack = lambda kv: keyfunc(kv[0]) + valfunc(kv[1])

        create = lambda v: binop(initial, v)

        return (rdd.keyBy(grouper)
                   .combineByKey(create, binop, combiner)
                   .map(unpack))
    else:
        raise NotImplementedError("By only implemented for common reductions."
                                  "\nGot %s" % type(t.apply))
Ejemplo n.º 8
0
 def _name(self):
     measure = self.dshape.measure
     if len(self._inputs) == 1 and isscalar(getattr(measure, 'key',
                                                    measure)):
         child_measure = self._child.dshape.measure
         if isscalar(getattr(child_measure, 'key', child_measure)):
             return self._child._name
Ejemplo n.º 9
0
def post_compute(e, q, scope=None):
    """
    Execute a query using MongoDB's aggregation pipeline

    The compute_up functions operate on Mongo Collection / list-of-dict
    queries.  Once they're done we need to actually execute the query on
    MongoDB.  We do this using the aggregation pipeline framework.

    http://docs.mongodb.org/manual/core/aggregation-pipeline/
    """
    scope = {
        '$project':
        toolz.merge(
            {'_id': 0},  # remove mongo identifier
            dict((col, 1) for col in e.fields))
    }
    q = q.append(scope)

    if not e.dshape.shape:  # not a collection
        result = q.coll.aggregate(list(q.query))['result'][0]
        if isscalar(e.dshape.measure):
            return result[e._name]
        else:
            return get(e.fields, result)

    dicts = q.coll.aggregate(list(q.query))['result']

    if isscalar(e.dshape.measure):
        return list(pluck(e.fields[0], dicts, default=None))  # dicts -> values
    else:
        return list(pluck(e.fields, dicts, default=None))  # dicts -> tuples
Ejemplo n.º 10
0
 def _name(self):
     measure = self.dshape.measure
     if len(self._inputs) == 1 and isscalar(getattr(measure, 'key',
                                                    measure)):
         child_measure = self._child.dshape.measure
         if isscalar(getattr(child_measure, 'key', child_measure)):
             # memoize the result
             return _setattr(self, '_name', self._child._name)
Ejemplo n.º 11
0
def test_map():
    t = TableSymbol("t", "{name: string, amount: int32, id: int32}")
    inc = lambda x: x + 1
    assert isscalar(t["amount"].map(inc, schema="int").dshape.measure)
    s = t["amount"].map(inc, schema="{amount: int}")
    assert not isscalar(s.dshape.measure)

    assert s.dshape == dshape("var * {amount: int}")

    expr = t[["name", "amount"]].map(identity, schema="{name: string, amount: int}")
    assert expr._name is None
Ejemplo n.º 12
0
def test_map():
    t = TableSymbol('t', '{name: string, amount: int32, id: int32}')
    inc = lambda x: x + 1
    assert isscalar(t['amount'].map(inc, schema='int').dshape.measure)
    s = t['amount'].map(inc, schema='{amount: int}')
    assert not isscalar(s.dshape.measure)

    assert s.dshape == dshape('var * {amount: int}')

    expr = (t[['name', 'amount']]
            .map(identity, schema='{name: string, amount: int}'))
    assert expr._name is None
Ejemplo n.º 13
0
def test_map():
    t = symbol('t', 'var * {name: string, amount: int32, id: int32}')
    inc = lambda x: x + 1
    assert isscalar(t['amount'].map(inc, schema='int').dshape.measure)
    s = t['amount'].map(inc, schema='{amount: int}')
    assert not isscalar(s.dshape.measure)

    assert s.dshape == dshape('var * {amount: int}')

    expr = (t[['name', 'amount']]
            .map(identity, schema='{name: string, amount: int}'))
    assert expr._name is None
Ejemplo n.º 14
0
def test_relabel():
    t = TableSymbol("t", "{name: string, amount: int32, id: int32}")

    rl = t.relabel({"name": "NAME", "id": "ID"})
    rlc = t["amount"].relabel({"amount": "BALANCE"})

    assert eval(str(rl)).isidentical(rl)

    print(rl.fields)
    assert rl.fields == ["NAME", "amount", "ID"]

    assert not isscalar(rl.dshape.measure)
    assert isscalar(rlc.dshape.measure)
Ejemplo n.º 15
0
def test_relabel():
    t = symbol('t', 'var * {name: string, amount: int32, id: int32}')

    rl = t.relabel({'name': 'NAME', 'id': 'ID'})
    rlc = t['amount'].relabel({'amount': 'BALANCE'})

    assert eval(str(rl)).isidentical(rl)

    print(rl.fields)
    assert rl.fields == ['NAME', 'amount', 'ID']

    assert not isscalar(rl.dshape.measure)
    assert isscalar(rlc.dshape.measure)
Ejemplo n.º 16
0
def test_relabel():
    t = TableSymbol('t', '{name: string, amount: int32, id: int32}')

    rl = t.relabel({'name': 'NAME', 'id': 'ID'})
    rlc = t['amount'].relabel({'amount': 'BALANCE'})

    assert eval(str(rl)).isidentical(rl)

    print(rl.fields)
    assert rl.fields == ['NAME', 'amount', 'ID']

    assert not isscalar(rl.dshape.measure)
    assert isscalar(rlc.dshape.measure)
Ejemplo n.º 17
0
def default_materialize(data, dshape, odo_kwargs):
    if iscollection(dshape):
        return odo(data, list, **odo_kwargs)
    if isscalar(dshape):
        return coerce_scalar(data, str(dshape), odo_kwargs)

    return data
Ejemplo n.º 18
0
def default_materialize(data, dshape, odo_kwargs):
    if iscollection(dshape):
        return odo(data, list, **odo_kwargs)
    if isscalar(dshape):
        return coerce_scalar(data, str(dshape), odo_kwargs)

    return data
Ejemplo n.º 19
0
def compserver(payload, serial):
    ns = payload.get('namespace', dict())
    compute_kwargs = payload.get('compute_kwargs') or {}
    odo_kwargs = payload.get('odo_kwargs') or {}
    dataset = _get_data()
    ns[':leaf'] = symbol('leaf', discover(dataset))

    expr = from_tree(payload['expr'], namespace=ns)
    assert len(expr._leaves()) == 1
    leaf = expr._leaves()[0]

    try:
        result = compute(expr, {leaf: dataset}, **compute_kwargs)

        if iscollection(expr.dshape):
            result = odo(result, list, **odo_kwargs)
        elif isscalar(expr.dshape):
            result = coerce_scalar(result, str(expr.dshape))
    except NotImplementedError as e:
        # 501: Not Implemented
        return ("Computation not supported:\n%s" % e, 501)
    except Exception as e:
        # 500: Internal Server Error
        return (
            "Computation failed with message:\n%s: %s" % (type(e).__name__, e),
            500,
        )

    return serial.dumps({
        'datashape': pprint(expr.dshape, width=0),
        'data': result,
        'names': expr.fields
    })
Ejemplo n.º 20
0
def compserver(payload, serial):
    ns = payload.get('namespace', dict())
    compute_kwargs = payload.get('compute_kwargs') or {}
    odo_kwargs = payload.get('odo_kwargs') or {}
    dataset = _get_data()
    ns[':leaf'] = symbol('leaf', discover(dataset))

    expr = from_tree(payload['expr'], namespace=ns)
    assert len(expr._leaves()) == 1
    leaf = expr._leaves()[0]

    try:
        result = compute(expr, {leaf: dataset}, **compute_kwargs)

        if iscollection(expr.dshape):
            result = odo(result, list, **odo_kwargs)
        elif isscalar(expr.dshape):
            result = coerce_scalar(result, str(expr.dshape))
    except NotImplementedError as e:
        # 501: Not Implemented
        return ("Computation not supported:\n%s" % e, 501)
    except Exception as e:
        # 500: Internal Server Error
        return (
            "Computation failed with message:\n%s: %s" % (type(e).__name__, e),
            500,
        )

    return serial.dumps({
        'datashape': pprint(expr.dshape, width=0),
        'data': result,
        'names': expr.fields
    })
Ejemplo n.º 21
0
def unit_to_dtype(ds):
    """

    >>> unit_to_dtype('int32')
    dtype('int32')
    >>> unit_to_dtype('float64')
    dtype('float64')
    >>> unit_to_dtype('?int64')
    dtype('float64')
    >>> unit_to_dtype('string')
    dtype('O')
    >>> unit_to_dtype('?datetime')
    dtype('<M8[us]')
    """
    if isinstance(ds, str):
        ds = dshape(ds)
    if isinstance(ds, DataShape):
        ds = ds.measure
    if isinstance(ds, Option) and isscalar(ds) and isnumeric(ds):
        return unit_to_dtype(str(ds).replace('int', 'float').replace('?', ''))
    if isinstance(ds, Option) and isinstance(
            ds.ty,
        (type(date_), type(datetime_), type(string), type(timedelta_))):
        ds = ds.ty
    if ds == string:
        return np.dtype('O')
    return to_numpy_dtype(ds)
Ejemplo n.º 22
0
def list_to_numpy(seq, dshape=None, **kwargs):
    if isinstance(element_of(seq), dict):
        seq = list(records_to_tuples(dshape, seq))
    if (seq and isinstance(seq[0], Iterable) and not ishashable(seq[0]) and
            not isscalar(dshape)):
        seq = list(map(tuple, seq))
    return np.array(seq, dtype=dshape_to_numpy(dshape))
Ejemplo n.º 23
0
    def __getattr__(self, key):
        assert key != '_hash', \
            '%s should set _hash in __init__' % type(self).__name__
        try:
            return _attr_cache[(self, key)]
        except:
            pass
        try:
            result = object.__getattribute__(self, key)
        except AttributeError:
            fields = dict(zip(map(valid_identifier, self.fields),
                              self.fields))

            # prefer the method if there's a field with the same name
            methods = toolz.merge(
                schema_methods(self.dshape.measure),
                dshape_methods(self.dshape)
            )
            if key in methods:
                func = methods[key]
                if func in method_properties:
                    result = func(self)
                else:
                    result = boundmethod(func, self)
            elif self.fields and key in fields:
                if isscalar(self.dshape.measure):  # t.foo.foo is t.foo
                    result = self
                else:
                    result = self[fields[key]]
            else:
                raise
        _attr_cache[(self, key)] = result
        return result
Ejemplo n.º 24
0
def compserver(dataset):
    if request.headers['content-type'] != 'application/json':
        return ("Expected JSON data", 404)
    try:
        payload = json.loads(request.data.decode('utf-8'))
    except ValueError:
        return ("Bad JSON.  Got %s " % request.data, 404)

    ns = payload.get('namespace', dict())
    ns[':leaf'] = symbol('leaf', discover(dataset))

    expr = from_tree(payload['expr'], namespace=ns)
    assert len(expr._leaves()) == 1
    leaf = expr._leaves()[0]

    try:
        result = compute(expr, {leaf: dataset})
    except Exception as e:
        return ("Computation failed with message:\n%s" % e, 500)

    if iscollection(expr.dshape):
        result = into(list, result)
    elif isscalar(expr.dshape):
        result = coerce_scalar(result, str(expr.dshape))

    return json.dumps({'datashape': str(expr.dshape),
                       'data': result}, default=json_dumps)
Ejemplo n.º 25
0
 def __getattr__(self, key):
     if key == '_hash':
         raise AttributeError()
     try:
         return _attr_cache[(self, key)]
     except:
         pass
     try:
         result = object.__getattribute__(self, key)
     except AttributeError:
         fields = dict(zip(map(valid_identifier, self.fields),
                           self.fields))
         if self.fields and key in fields:
             if isscalar(self.dshape.measure):  # t.foo.foo is t.foo
                 result = self
             else:
                 result = self[fields[key]]
         else:
             d = toolz.merge(schema_methods(self.dshape.measure),
                             dshape_methods(self.dshape))
             if key in d:
                 func = d[key]
                 if func in method_properties:
                     result = func(self)
                 else:
                     result = boundmethod(func, self)
             else:
                 raise
     _attr_cache[(self, key)] = result
     return result
Ejemplo n.º 26
0
    def __getattr__(self, key):
        assert key != "_hash", "%s expressions should set _hash in __init__" % type(self).__name__
        try:
            result = object.__getattribute__(self, key)
        except AttributeError:
            fields = dict(zip(map(valid_identifier, self.fields), self.fields))

            # prefer the method if there's a field with the same name
            methods = toolz.merge(schema_methods(self.dshape.measure), dshape_methods(self.dshape))
            if key in methods:
                func = methods[key]
                if func in method_properties:
                    result = func(self)
                else:
                    result = boundmethod(func, self)
            elif self.fields and key in fields:
                if isscalar(self.dshape.measure):  # t.foo.foo is t.foo
                    result = self
                else:
                    result = self[fields[key]]
            else:
                raise

        # cache the attribute lookup, getattr will not be invoked again.
        setattr(self, key, result)
        return result
Ejemplo n.º 27
0
def compserver(serial_format):
    try:
        serial = _get_format(serial_format)
    except KeyError:
        return 'Unsupported serialization format', 404

    try:
        payload = serial.loads(request.data)
    except ValueError:
        return ("Bad data.  Got %s " % request.data, 400)  # 400: Bad Request

    ns = payload.get('namespace', dict())
    dataset = _get_data()
    ns[':leaf'] = symbol('leaf', discover(dataset))

    expr = from_tree(payload['expr'], namespace=ns)
    assert len(expr._leaves()) == 1
    leaf = expr._leaves()[0]

    try:
        result = compute(expr, {leaf: dataset})

        if iscollection(expr.dshape):
            result = odo(result, list)
        elif isscalar(expr.dshape):
            result = coerce_scalar(result, str(expr.dshape))
    except NotImplementedError as e:
        # 501: Not Implemented
        return ("Computation not supported:\n%s" % e, 501)
    except Exception as e:
        # 500: Internal Server Error
        return ("Computation failed with message:\n%s" % e, 500)

    return serial.dumps({'datashape': str(expr.dshape), 'data': result})
Ejemplo n.º 28
0
def test_base():
    for expr, exclusions in expressions.items():
        if iscollection(expr.dshape):
            model = into(DataFrame, into(np.ndarray, expr._subs({t: Data(base, t.dshape)})))
        else:
            model = compute(expr._subs({t: Data(base, t.dshape)}))
        print('\nexpr: %s\n' % expr)
        for source in sources:
            if id(source) in map(id, exclusions):
                continue
            print('%s <- %s' % (typename(model), typename(source)))
            T = Data(source)
            if iscollection(expr.dshape):
                result = into(type(model), expr._subs({t: T}))
                if isscalar(expr.dshape.measure):
                    assert set(into(list, result)) == set(into(list, model))
                else:
                    assert df_eq(result, model)
            elif isrecord(expr.dshape):
                result = compute(expr._subs({t: T}))
                assert into(tuple, result) == into(tuple, model)
            else:
                result = compute(expr._subs({t: T}))
                try:
                    result = result.scalar()
                except AttributeError:
                    pass
                assert result == model
Ejemplo n.º 29
0
def compserver():
    if request.headers['content-type'] != 'application/json':
        return ("Expected JSON data", 415)  # 415: Unsupported Media Type
    try:
        payload = json.loads(request.data.decode('utf-8'))
    except ValueError:
        return ("Bad JSON.  Got %s " % request.data, 400)  # 400: Bad Request

    ns = payload.get('namespace', dict())
    dataset = _get_data()
    ns[':leaf'] = symbol('leaf', discover(dataset))

    expr = from_tree(payload['expr'], namespace=ns)
    assert len(expr._leaves()) == 1
    leaf = expr._leaves()[0]

    try:
        result = compute(expr, {leaf: dataset})

        if iscollection(expr.dshape):
            result = odo(result, list)
        elif isscalar(expr.dshape):
            result = coerce_scalar(result, str(expr.dshape))
    except NotImplementedError as e:
        # 501: Not Implemented
        return ("Computation not supported:\n%s" % e, 501)
    except Exception as e:
        # 500: Internal Server Error
        return ("Computation failed with message:\n%s" % e, 500)

    return json.dumps({'datashape': str(expr.dshape),
                       'data': result}, default=json_dumps)
Ejemplo n.º 30
0
def compserver():
    if request.headers['content-type'] != 'application/json':
        return ("Expected JSON data", 415)  # 415: Unsupported Media Type
    try:
        payload = json.loads(request.data.decode('utf-8'))
    except ValueError:
        return ("Bad JSON.  Got %s " % request.data, 400)  # 400: Bad Request

    ns = payload.get('namespace', dict())
    dataset = _get_data()
    ns[':leaf'] = symbol('leaf', discover(dataset))

    expr = from_tree(payload['expr'], namespace=ns)
    assert len(expr._leaves()) == 1
    leaf = expr._leaves()[0]

    try:
        result = compute(expr, {leaf: dataset})

        if iscollection(expr.dshape):
            result = odo(result, list)
        elif isscalar(expr.dshape):
            result = coerce_scalar(result, str(expr.dshape))
    except NotImplementedError as e:
        # 501: Not Implemented
        return ("Computation not supported:\n%s" % e, 501)
    except Exception as e:
        # 500: Internal Server Error
        return ("Computation failed with message:\n%s" % e, 500)

    return json.dumps({'datashape': str(expr.dshape),
                       'data': result}, default=json_dumps)
Ejemplo n.º 31
0
    def __getattr__(self, key):
        assert key != '_hash', \
            '%s should set _hash in _init' % type(self).__name__
        try:
            result = object.__getattribute__(self, key)
        except AttributeError:
            fields = dict(zip(map(valid_identifier, self.fields), self.fields))

            # prefer the method if there's a field with the same name
            methods = toolz.merge(
                schema_methods(self.dshape.measure),
                dshape_methods(self.dshape)
            )
            if key in methods:
                func = methods[key]
                if func in method_properties:
                    result = func(self)
                else:
                    result = boundmethod(func, self)
            elif self.fields and key in fields:
                if isscalar(self.dshape.measure):  # t.foo.foo is t.foo
                    result = self
                else:
                    result = self[fields[key]]
            else:
                raise

        # cache the attribute lookup, getattr will not be invoked again.
        _setattr(self, key, result)
        return result
Ejemplo n.º 32
0
def unit_to_dtype(ds):
    """

    >>> unit_to_dtype('int32')
    dtype('int32')
    >>> unit_to_dtype('float64')
    dtype('float64')
    >>> unit_to_dtype('?int64')
    dtype('float64')
    >>> unit_to_dtype('string')
    dtype('O')
    >>> unit_to_dtype('?datetime')
    dtype('<M8[us]')
    """
    if isinstance(ds, str):
        ds = dshape(ds)
    if isinstance(ds, DataShape):
        ds = ds.measure
    if isinstance(ds, Option) and isscalar(ds) and isnumeric(ds):
        return unit_to_dtype(str(ds).replace('int', 'float').replace('?', ''))
    if isinstance(ds, Option) and isinstance(ds.ty, (type(date_),
            type(datetime_), type(string), type(timedelta_))):
        ds = ds.ty
    if ds == string:
        return np.dtype('O')
    return to_numpy_dtype(ds)
Ejemplo n.º 33
0
 def _get_field(self, key):
     for child in self.children:
         if key in child.fields:
             if isscalar(child.dshape.measure):
                 return child
             else:
                 return child[key]
Ejemplo n.º 34
0
 def __getattr__(self, key):
     if key == '_hash':
         raise AttributeError()
     try:
         return _attr_cache[(self, key)]
     except:
         pass
     try:
         result = object.__getattribute__(self, key)
     except AttributeError:
         fields = dict(zip(map(valid_identifier, self.fields), self.fields))
         if self.fields and key in fields:
             if isscalar(self.dshape.measure):  # t.foo.foo is t.foo
                 result = self
             else:
                 result = self[fields[key]]
         else:
             d = toolz.merge(schema_methods(self.dshape.measure),
                             dshape_methods(self.dshape))
             if key in d:
                 func = d[key]
                 if func in method_properties:
                     result = func(self)
                 else:
                     result = functools.update_wrapper(
                         partial(func, self), func)
             else:
                 raise
     _attr_cache[(self, key)] = result
     return result
Ejemplo n.º 35
0
 def _get_field(self, key):
     for arg in self.args:
         if key in arg.fields:
             if isscalar(arg.dshape.measure):
                 return arg
             else:
                 return arg[key]
Ejemplo n.º 36
0
    def __getattr__(self, key):
        if key == '_hash':
            raise AttributeError(key)
        try:
            return _attr_cache[(self, key)]
        except:
            pass
        try:
            result = object.__getattribute__(self, key)
        except AttributeError:
            fields = dict(zip(map(valid_identifier, self.fields), self.fields))

            # prefer the method if there's a field with the same name
            methods = toolz.merge(schema_methods(self.dshape.measure),
                                  dshape_methods(self.dshape))
            if key in methods:
                func = methods[key]
                if func in method_properties:
                    result = func(self)
                else:
                    result = boundmethod(func, self)
            elif self.fields and key in fields:
                if isscalar(self.dshape.measure):  # t.foo.foo is t.foo
                    result = self
                else:
                    result = self[fields[key]]
            else:
                raise
        _attr_cache[(self, key)] = result
        return result
Ejemplo n.º 37
0
 def _get_field(self, key):
     for child in self.children:
         if key in child.fields:
             if isscalar(child.dshape.measure):
                 return child
             else:
                 return child[key]
Ejemplo n.º 38
0
def test_base():
    for expr, exclusions in expressions.items():
        if iscollection(expr.dshape):
            model = into(
                DataFrame,
                into(np.ndarray, expr._subs({t: Data(base, t.dshape)})))
        else:
            model = compute(expr._subs({t: Data(base, t.dshape)}))
        print('\nexpr: %s\n' % expr)
        for source in sources:
            if id(source) in map(id, exclusions):
                continue
            print('%s <- %s' % (typename(model), typename(source)))
            T = Data(source)
            if iscollection(expr.dshape):
                result = into(type(model), expr._subs({t: T}))
                if isscalar(expr.dshape.measure):
                    assert set(into(list, result)) == set(into(list, model))
                else:
                    assert df_eq(result, model)
            elif isrecord(expr.dshape):
                result = compute(expr._subs({t: T}))
                assert into(tuple, result) == into(tuple, model)
            else:
                result = compute(expr._subs({t: T}))
                try:
                    result = result.scalar()
                except AttributeError:
                    pass
                assert result == model
Ejemplo n.º 39
0
def list_to_numpy(seq, dshape=None, **kwargs):
    if isinstance(element_of(seq), dict):
        seq = list(records_to_tuples(dshape, seq))
    if (seq and isinstance(seq[0], Iterable) and not ishashable(seq[0]) and
            not isscalar(dshape)):
        seq = list(map(tuple, seq))
    return np.array(seq, dtype=dshape_to_numpy(dshape))
Ejemplo n.º 40
0
def Data(data, dshape=None, name=None, fields=None, columns=None, schema=None,
         **kwargs):
    sub_uri = ''
    if isinstance(data, _strtypes):
        if '::' in data:
            data, sub_uri = data.split('::')
        data = resource(data, schema=schema, dshape=dshape, columns=columns,
                        **kwargs)
    if (isinstance(data, Iterator) and
            not isinstance(data, tuple(not_an_iterator))):
        data = tuple(data)
    if columns:
        warnings.warn("columns kwarg deprecated.  Use fields instead",
                      DeprecationWarning)
    if columns and not fields:
        fields = columns
    if schema and dshape:
        raise ValueError("Please specify one of schema= or dshape= keyword"
                         " arguments")
    if schema and not dshape:
        dshape = var * schema
    if dshape and isinstance(dshape, _strtypes):
        dshape = datashape.dshape(dshape)
    if not dshape:
        dshape = discover(data)
        types = None
        if isinstance(dshape.measure, Tuple) and fields:
            types = dshape[1].dshapes
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape + (schema,)))
        elif isscalar(dshape.measure) and fields:
            types = (dshape.measure,) * int(dshape[-2])
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape[:-1] + (schema,)))
        elif isrecord(dshape.measure) and fields:
            ds = discover(data)
            assert isrecord(ds.measure)
            names = ds.measure.names
            if names != fields:
                raise ValueError('data column names %s\n'
                                 '\tnot equal to fields parameter %s,\n'
                                 '\tuse Data(data).relabel(%s) to rename fields'
                                 % (names,
                                    fields,
                                    ', '.join('%s=%r' % (k, v)
                                              for k, v in zip(names, fields))))
            types = dshape.measure.types
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape + (schema,)))

    ds = datashape.dshape(dshape)
    result = InteractiveSymbol(data, ds, name)

    if sub_uri:
        for field in sub_uri.split('/'):
            if field:
                result = result[field]

    return result
Ejemplo n.º 41
0
 def _name(self):
     if not isscalar(self.dshape.measure):
         return None
     l, r = name(self.lhs), name(self.rhs)
     if l and not r:
         return l
     if r and not l:
         return r
Ejemplo n.º 42
0
def fastmsgpack_materialize(data, dshape, odo_kwargs):
    if istabular(dshape):
        return odo(data, pd.DataFrame, **odo_kwargs)
    if iscollection(dshape):
        return odo(data, pd.Series, **odo_kwargs)
    if isscalar(dshape):
        return coerce_scalar(data, str(dshape), odo_kwargs)
    return data
Ejemplo n.º 43
0
def compute_up(t, seq, **kwargs):
    if isscalar(t._child.dshape.measure) and t.key == t._child._name:
        key = identity
    elif isinstance(t.key, (str, unicode, tuple, list)):
        key = rowfunc(t._child[t.key])
    else:
        key = rrowfunc(optimize(t.key, seq), t._child)
    return sorted(seq, key=key, reverse=not t.ascending)
Ejemplo n.º 44
0
def test_distinct():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, count(t.amount.distinct()))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(chunk.amount.distinct())

    assert isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(count(agg.distinct()))
Ejemplo n.º 45
0
def test_reductions():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount.nunique())

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(chunk.amount.distinct())

    assert isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(agg.distinct().count())

    (chunk, chunk_expr), (agg, agg_expr) = \
        split(t, t.amount.nunique(keepdims=True))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(chunk.amount.distinct())

    assert isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(agg.distinct().count(keepdims=True))
Ejemplo n.º 46
0
def test_sum():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount.sum())

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(chunk.amount.sum(keepdims=True))

    assert isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(sum(agg))
Ejemplo n.º 47
0
def test_distinct():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, count(t.amount.distinct()))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(chunk.amount.distinct())

    assert isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(count(agg.distinct()))
Ejemplo n.º 48
0
def test_sum():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount.sum())

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(chunk.amount.sum(keepdims=True))

    assert isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(sum(agg))
Ejemplo n.º 49
0
def fastmsgpack_materialize(data, dshape, odo_kwargs):
    if istabular(dshape):
        return odo(data, pd.DataFrame, **odo_kwargs)
    if iscollection(dshape):
        return odo(data, pd.Series, **odo_kwargs)
    if isscalar(dshape):
        return coerce_scalar(data, str(dshape), odo_kwargs)
    return data
Ejemplo n.º 50
0
def test_reductions():
    (chunk, chunk_expr), (agg, agg_expr) = split(t, t.amount.nunique())

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(chunk.amount.distinct())

    assert isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(agg.distinct().count())

    (chunk, chunk_expr), (agg, agg_expr) = \
        split(t, t.amount.nunique(keepdims=True))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(chunk.amount.distinct())

    assert isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(agg.distinct().count(keepdims=True))
Ejemplo n.º 51
0
def Data(data,
         dshape=None,
         name=None,
         fields=None,
         columns=None,
         schema=None,
         **kwargs):
    sub_uri = ''
    if isinstance(data, _strtypes):
        if '::' in data:
            data, sub_uri = data.split('::')
        data = resource(data,
                        schema=schema,
                        dshape=dshape,
                        columns=columns,
                        **kwargs)
    if (isinstance(data, Iterator)
            and not isinstance(data, tuple(not_an_iterator))):
        data = tuple(data)
    if columns:
        warnings.warn("columns kwarg deprecated.  Use fields instead",
                      DeprecationWarning)
    if columns and not fields:
        fields = columns
    if schema and dshape:
        raise ValueError("Please specify one of schema= or dshape= keyword"
                         " arguments")
    if schema and not dshape:
        dshape = var * schema
    if dshape and isinstance(dshape, _strtypes):
        dshape = datashape.dshape(dshape)
    if not dshape:
        dshape = discover(data)
        types = None
        if isinstance(dshape.measure, Tuple) and fields:
            types = dshape[1].dshapes
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape + (schema, )))
        elif isscalar(dshape.measure) and fields:
            types = (dshape.measure, ) * int(dshape[-2])
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape[:-1] + (schema, )))
        elif isrecord(dshape.measure) and fields:
            types = dshape.measure.types
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape + (schema, )))

    ds = datashape.dshape(dshape)

    name = name or next(names)
    result = InteractiveSymbol(data, ds, name)

    if sub_uri:
        for field in sub_uri.split('/'):
            if field:
                result = result[field]

    return result
Ejemplo n.º 52
0
def Data(data,
         dshape=None,
         name=None,
         fields=None,
         columns=None,
         schema=None,
         **kwargs):
    if columns:
        raise ValueError("columns argument deprecated, use fields instead")
    if schema and dshape:
        raise ValueError("Please specify one of schema= or dshape= keyword"
                         " arguments")

    if isinstance(data, InteractiveSymbol):
        return Data(data.data, dshape, name, fields, columns, schema, **kwargs)

    if isinstance(data, _strtypes):
        data = resource(data,
                        schema=schema,
                        dshape=dshape,
                        columns=columns,
                        **kwargs)
    if (isinstance(data, Iterator)
            and not isinstance(data, tuple(not_an_iterator))):
        data = tuple(data)
    if schema and not dshape:
        dshape = var * schema
    if dshape and isinstance(dshape, _strtypes):
        dshape = datashape.dshape(dshape)
    if not dshape:
        dshape = discover(data)
        types = None
        if isinstance(dshape.measure, Tuple) and fields:
            types = dshape[1].dshapes
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape + (schema, )))
        elif isscalar(dshape.measure) and fields:
            types = (dshape.measure, ) * int(dshape[-2])
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape[:-1] + (schema, )))
        elif isrecord(dshape.measure) and fields:
            ds = discover(data)
            assert isrecord(ds.measure)
            names = ds.measure.names
            if names != fields:
                raise ValueError(
                    'data column names %s\n'
                    '\tnot equal to fields parameter %s,\n'
                    '\tuse Data(data).relabel(%s) to rename '
                    'fields' %
                    (names, fields, ', '.join('%s=%r' % (k, v)
                                              for k, v in zip(names, fields))))
            types = dshape.measure.types
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape + (schema, )))

    ds = datashape.dshape(dshape)
    return InteractiveSymbol(data, ds, name)
Ejemplo n.º 53
0
def select_to_base(sel, dshape=None, **kwargs):
    engine = sel.bind  # TODO: get engine from select

    with engine.connect() as conn:
        result = conn.execute(sel)
        assert not dshape or isscalar(dshape)
        result = list(result)[0][0]

    return result
Ejemplo n.º 54
0
def test_by_sum():
    (chunk, chunk_expr), (agg, agg_expr) = \
        split(t, by(t.name, total=t.amount.sum()))

    assert chunk.schema == t.schema
    assert chunk_expr.isidentical(by(chunk.name, total=chunk.amount.sum()))

    assert not isscalar(agg.dshape.measure)
    assert agg_expr.isidentical(by(agg.name, total=agg.total.sum()))
Ejemplo n.º 55
0
 def _name(self):
     if not isscalar(self.dshape.measure):
         return None
     l, r = name(self.lhs), name(self.rhs)
     if l and not r:
         return l
     if r and not l:
         return r
     if l == r:
         return l