예제 #1
0
파일: table.py 프로젝트: B-Rich/blaze
 def __init__(self, parent, grouper, apply):
     self.parent = parent
     s = TableSymbol('', parent.schema, parent.iscolumn)
     self.grouper = grouper.subs({parent: s})
     self.apply = apply.subs({parent: s})
     if isdimension(self.apply.dshape[0]):
         raise TypeError("Expected Reduction")
예제 #2
0
파일: sql.py 프로젝트: holdenk/blaze
def dshape_to_alchemy(dshape):
    """

    >>> dshape_to_alchemy('int')
    <class 'sqlalchemy.sql.sqltypes.Integer'>

    >>> dshape_to_alchemy('string')
    <class 'sqlalchemy.sql.sqltypes.String'>

    >>> dshape_to_alchemy('{name: string, amount: int}')
    [Column('name', String(), table=None, nullable=False), Column('amount', Integer(), table=None, nullable=False)]

    >>> dshape_to_alchemy('{name: ?string, amount: ?int}')
    [Column('name', String(), table=None), Column('amount', Integer(), table=None)]
    """
    if isinstance(dshape, _strtypes):
        dshape = datashape.dshape(dshape)
    if isinstance(dshape, Option):
        return dshape_to_alchemy(dshape.ty)
    if str(dshape) in types:
        return types[str(dshape)]
    if isinstance(dshape, datashape.Record):
        return [sql.Column(name,
                           dshape_to_alchemy(typ),
                           nullable=isinstance(typ[0], Option))
                    for name, typ in dshape.parameters[0]]
    if isinstance(dshape, datashape.DataShape):
        if isdimension(dshape[0]):
            return dshape_to_alchemy(dshape[1])
        else:
            return dshape_to_alchemy(dshape[0])
    raise NotImplementedError("No SQLAlchemy dtype match for datashape: %s"
            % dshape)
예제 #3
0
 def __init__(self, name, dshape=None, iscolumn=False):
     self._name = name
     if isinstance(dshape, _strtypes):
         dshape = datashape.dshape(dshape)
     if not isdimension(dshape[0]):
         dshape = datashape.var * dshape
     self.dshape = dshape
     self.iscolumn = iscolumn
예제 #4
0
파일: mongo.py 프로젝트: vitan/blaze
def discover(coll, n=50):
    items = list(take(n, coll.find()))
    for item in items:
        del item['_id']

    ds = discover(items)

    if isdimension(ds[0]):
        return coll.count() * ds.subshape[0]
    else:
        raise ValueError("Consistent datashape not found")
예제 #5
0
파일: mongo.py 프로젝트: Casolt/blaze
def discover(coll, n=50):
    items = list(take(n, coll.find()))
    for item in items:
        del item['_id']

    ds = discover(items)

    if isdimension(ds[0]):
        return coll.count() * ds.subshape[0]
    else:
        raise ValueError("Consistent datashape not found")
예제 #6
0
파일: table.py 프로젝트: pgnepal/blaze
    def __init__(self,
                 data,
                 dshape=None,
                 name=None,
                 columns=None,
                 iscolumn=False,
                 schema=None):
        if isinstance(data, str):
            data = resource(data)
        if schema and dshape:
            raise ValueError("Please specify one of schema= or dshape= keyword"
                             " arguments")
        if schema and not dshape:
            dshape = var * schema
        if dshape and isinstance(dshape, _strtypes):
            dshape = datashape.dshape(dshape)
        if dshape and not isdimension(dshape[0]):
            dshape = var * dshape
        if not dshape:
            dshape = discover(data)
            types = None
            if isinstance(dshape[1], Tuple):
                columns = columns or list(range(len(dshape[1].dshapes)))
                types = dshape[1].dshapes
            if isinstance(dshape[1], Record):
                columns = columns or dshape[1].names
                types = dshape[1].types
            if isinstance(dshape[1], Fixed):
                types = (dshape[2], ) * int(dshape[1])
            if not columns:
                raise TypeError("Could not infer column names from data. "
                                "Please specify column names with `columns=` "
                                "keyword")
            if not types:
                raise TypeError("Could not infer data types from data. "
                                "Please specify schema with `schema=` keyword")

            dshape = dshape[0] * datashape.dshape(
                Record(list(zip(columns, types))))

        self.dshape = datashape.dshape(dshape)

        self.data = data

        if (hasattr(data, 'schema') and isinstance(data.schema,
                                                   (DataShape, str, unicode))
                and self.schema != data.schema):
            raise TypeError('%s schema %s does not match %s schema %s' %
                            (type(data).__name__, data.schema,
                             type(self).__name__, self.schema))

        self._name = name or next(names)
        self.iscolumn = iscolumn
예제 #7
0
파일: table.py 프로젝트: quasiben/blaze
    def __init__(self, data, dshape=None, name=None, columns=None,
            iscolumn=False, schema=None):
        if isinstance(data, str):
            data = resource(data)
        if schema and dshape:
            raise ValueError("Please specify one of schema= or dshape= keyword"
                    " arguments")
        if schema and not dshape:
            dshape = var * schema
        if dshape and isinstance(dshape, _strtypes):
            dshape = datashape.dshape(dshape)
        if dshape and not isdimension(dshape[0]):
            dshape = var * dshape
        if not dshape:
            dshape = discover(data)
            types = None
            if isinstance(dshape[1], Tuple):
                columns = columns or list(range(len(dshape[1].dshapes)))
                types = dshape[1].dshapes
            if isinstance(dshape[1], Record):
                columns = columns or dshape[1].names
                types = dshape[1].types
            if isinstance(dshape[1], Fixed):
                types = (dshape[2],) * int(dshape[1])
            if not columns:
                raise TypeError("Could not infer column names from data. "
                                "Please specify column names with `column=` "
                                "keyword")
            if not types:
                raise TypeError("Could not infer data types from data. "
                                "Please specify schema with `schema=` keyword")

            dshape = dshape[0] * datashape.dshape(Record(list(zip(columns, types))))

        self.dshape = datashape.dshape(dshape)

        self.data = data

        if (hasattr(data, 'schema')
             and isinstance(data.schema, (DataShape, str, unicode))
             and self.schema != data.schema):
            raise TypeError('%s schema %s does not match %s schema %s' %
                            (type(data).__name__, data.schema,
                             type(self).__name__, self.schema))

        self._name = name or next(names)
        self.iscolumn = iscolumn
예제 #8
0
def dshape_to_alchemy(dshape):
    """

    >>> dshape_to_alchemy('int')
    <class 'sqlalchemy.sql.sqltypes.Integer'>

    >>> dshape_to_alchemy('string')
    <class 'sqlalchemy.sql.sqltypes.Text'>

    >>> dshape_to_alchemy('{name: string, amount: int}')
    [Column('name', Text(), table=None, nullable=False), Column('amount', Integer(), table=None, nullable=False)]

    >>> dshape_to_alchemy('{name: ?string, amount: ?int}')
    [Column('name', Text(), table=None), Column('amount', Integer(), table=None)]
    """
    if isinstance(dshape, _strtypes):
        dshape = datashape.dshape(dshape)
    if isinstance(dshape, Option):
        return dshape_to_alchemy(dshape.ty)
    if str(dshape) in types:
        return types[str(dshape)]
    if isinstance(dshape, datashape.Record):
        return [
            sql.Column(name,
                       dshape_to_alchemy(typ),
                       nullable=isinstance(typ[0], Option))
            for name, typ in dshape.parameters[0]
        ]
    if isinstance(dshape, datashape.DataShape):
        if isdimension(dshape[0]):
            return dshape_to_alchemy(dshape[1])
        else:
            return dshape_to_alchemy(dshape[0])
    if isinstance(dshape, datashape.String):
        if dshape[0].fixlen is None:
            return sql.types.Text
        if 'U' in dshape.encoding:
            return sql.types.Unicode(length=dshape[0].fixlen)
        if 'A' in dshape.encoding:
            return sql.types.String(length=dshape[0].fixlen)
    if isinstance(dshape, datashape.DateTime):
        if dshape.tz:
            return sql.types.DateTime(timezone=True)
        else:
            return sql.types.DateTime(timezone=False)
    raise NotImplementedError("No SQLAlchemy dtype match for datashape: %s" %
                              dshape)
예제 #9
0
파일: sparksql.py 프로젝트: vitan/blaze
def deoption(ds):
    """

    >>> deoption('int32')
    ctype("int32")

    >>> deoption('?int32')
    ctype("int32")
    """
    if isinstance(ds, str):
        ds = dshape(ds)
    if isinstance(ds, DataShape) and not isdimension(ds[0]):
        return deoption(ds[0])
    if isinstance(ds, Option):
        return ds.ty
    else:
        return ds
예제 #10
0
def deoption(ds):
    """

    >>> deoption('int32')
    ctype("int32")

    >>> deoption('?int32')
    ctype("int32")
    """
    if isinstance(ds, str):
        ds = dshape(ds)
    if isinstance(ds, DataShape) and not isdimension(ds[0]):
        return deoption(ds[0])
    if isinstance(ds, Option):
        return ds.ty
    else:
        return ds
예제 #11
0
파일: sql.py 프로젝트: Casolt/blaze
def dshape_to_alchemy(dshape):
    """

    >>> dshape_to_alchemy('int')
    <class 'sqlalchemy.sql.sqltypes.Integer'>

    >>> dshape_to_alchemy('string')
    <class 'sqlalchemy.sql.sqltypes.Text'>

    >>> dshape_to_alchemy('{name: string, amount: int}')
    [Column('name', Text(), table=None, nullable=False), Column('amount', Integer(), table=None, nullable=False)]

    >>> dshape_to_alchemy('{name: ?string, amount: ?int}')
    [Column('name', Text(), table=None), Column('amount', Integer(), table=None)]
    """
    if isinstance(dshape, _strtypes):
        dshape = datashape.dshape(dshape)
    if isinstance(dshape, Option):
        return dshape_to_alchemy(dshape.ty)
    if str(dshape) in types:
        return types[str(dshape)]
    if isinstance(dshape, datashape.Record):
        return [sql.Column(name,
                           dshape_to_alchemy(typ),
                           nullable=isinstance(typ[0], Option))
                    for name, typ in dshape.parameters[0]]
    if isinstance(dshape, datashape.DataShape):
        if isdimension(dshape[0]):
            return dshape_to_alchemy(dshape[1])
        else:
            return dshape_to_alchemy(dshape[0])
    if isinstance(dshape, datashape.String):
        if dshape[0].fixlen is None:
            return sql.types.Text
        if 'U' in dshape.encoding:
            return sql.types.Unicode(length=dshape[0].fixlen)
        if 'A' in dshape.encoding:
            return sql.types.String(length=dshape[0].fixlen)
    if isinstance(dshape, datashape.DateTime):
        if dshape.tz:
            return sql.types.DateTime(timezone=True)
        else:
            return sql.types.DateTime(timezone=False)
    raise NotImplementedError("No SQLAlchemy dtype match for datashape: %s"
            % dshape)
예제 #12
0
파일: sparksql.py 프로젝트: vitan/blaze
    def ds_to_sparksql(ds):
        """ Convert datashape to SparkSQL type system

        >>> print(ds_to_sparksql('int32')) # doctest: +SKIP
        IntegerType

        >>> print(ds_to_sparksql('5 * int32')) # doctest: +SKIP
        ArrayType(IntegerType,false)

        >>> print(ds_to_sparksql('5 * ?int32'))  # doctest: +SKIP
        ArrayType(IntegerType,true)

        >>> print(ds_to_sparksql('{name: string, amount: int32}'))  # doctest: +SKIP
        StructType(List(StructField(name,StringType,false),StructField(amount,IntegerType,false)))

        >>> print(ds_to_sparksql('10 * {name: string, amount: ?int32}'))  # doctest: +SKIP
        ArrayType(StructType(List(StructField(name,StringType,false),StructField(amount,IntegerType,true))),false)
        """
        if isinstance(ds, str):
            return ds_to_sparksql(dshape(ds))
        if isinstance(ds, Record):
            return sql.StructType([
                sql.StructField(name, ds_to_sparksql(deoption(typ)),
                                isinstance(typ, datashape.Option))
                for name, typ in ds.fields
            ])
        if isinstance(ds, DataShape):
            if isdimension(ds[0]):
                elem = ds.subshape[0]
                if isinstance(elem, DataShape) and len(elem) == 1:
                    elem = elem[0]
                return sql.ArrayType(ds_to_sparksql(deoption(elem)),
                                     isinstance(elem, Option))
            else:
                return ds_to_sparksql(ds[0])
        if ds in types:
            return types[ds]
        raise NotImplementedError()
예제 #13
0
    def ds_to_sparksql(ds):
        """ Convert datashape to SparkSQL type system

        >>> print(ds_to_sparksql('int32')) # doctest: +SKIP
        IntegerType

        >>> print(ds_to_sparksql('5 * int32')) # doctest: +SKIP
        ArrayType(IntegerType,false)

        >>> print(ds_to_sparksql('5 * ?int32'))  # doctest: +SKIP
        ArrayType(IntegerType,true)

        >>> print(ds_to_sparksql('{name: string, amount: int32}'))  # doctest: +SKIP
        StructType(List(StructField(name,StringType,false),StructField(amount,IntegerType,false)))

        >>> print(ds_to_sparksql('10 * {name: string, amount: ?int32}'))  # doctest: +SKIP
        ArrayType(StructType(List(StructField(name,StringType,false),StructField(amount,IntegerType,true))),false)
        """
        if isinstance(ds, str):
            return ds_to_sparksql(dshape(ds))
        if isinstance(ds, Record):
            return sql.StructType([
                sql.StructField(name,
                                ds_to_sparksql(deoption(typ)),
                                isinstance(typ, datashape.Option))
                for name, typ in ds.fields])
        if isinstance(ds, DataShape):
            if isdimension(ds[0]):
                elem = ds.subshape[0]
                if isinstance(elem, DataShape) and len(elem) == 1:
                    elem = elem[0]
                return sql.ArrayType(ds_to_sparksql(deoption(elem)),
                                     isinstance(elem, Option))
            else:
                return ds_to_sparksql(ds[0])
        if ds in types:
            return types[ds]
        raise NotImplementedError()
예제 #14
0
파일: chunks.py 프로젝트: vitan/blaze
def discover(c):
    ds = discover(first(c))
    assert isdimension(ds[0])
    return var * ds.subshape[0]
예제 #15
0
파일: chunks.py 프로젝트: gdementen/blaze
def discover(c):
    ds = discover(first(c))
    assert isdimension(ds[0])
    return var * ds.subshape[0]
예제 #16
0
 def schema(self):
     if isdimension(self.dshape[0]):
         return self.dshape.subshape[0]
     else:
         raise TypeError("Non-tabular datashape, %s" % self.dshape)
예제 #17
0
파일: table.py 프로젝트: ChrisBg/blaze
def by(child, grouper, apply):
    if isdimension(apply.dshape[0]):
        raise TypeError("Expected Reduction")
    return By(child, grouper, apply)