Ejemplo n.º 1
0
    def _pieces(self):
        if self.partition is not None:
            main_schema = self.schema
            part_schema = self.partition
            if not isinstance(part_schema, sch.Schema):
                part_schema = sch.Schema(
                    part_schema, [self.schema[name] for name in part_schema])

            to_delete = []
            for name in self.partition:
                if name in self.schema:
                    to_delete.append(name)

            if len(to_delete):
                main_schema = main_schema.delete(to_delete)

            yield format_schema(main_schema)
            yield 'PARTITIONED BY {}'.format(format_schema(part_schema))
        else:
            yield format_schema(self.schema)

        if self.table_format is not None:
            yield '\n'.join(self.table_format.to_ddl())
        else:
            yield self._storage()

        yield self._location()
Ejemplo n.º 2
0
    def schema(self):
        # Resolve schema and initialize
        if not self.selections:
            return self.table.schema()

        types = []
        names = []

        for projection in self.selections:
            if isinstance(projection, ir.DestructColumn):
                # If this is a destruct, then we destructure
                # the result and assign to multiple columns
                struct_type = projection.type()
                for name in struct_type.names:
                    names.append(name)
                    types.append(struct_type[name])
            elif isinstance(projection, ir.Value):
                names.append(projection.get_name())
                types.append(projection.type())
            elif isinstance(projection, ir.Table):
                schema = projection.schema()
                names.extend(schema.names)
                types.extend(schema.types)

        return sch.Schema(names, types)
Ejemplo n.º 3
0
    def get_schema(self, table_name, database=None):
        """
        Return a Schema object for the indicated table and database

        Parameters
        ----------
        table_name : string
          May be fully qualified
        database : string, default None

        Returns
        -------
        schema : ibis Schema
        """
        qualified_name = self._fully_qualified_name(table_name, database)
        query = 'DESC {0}'.format(qualified_name)
        data, _ = self._execute(query)

        names, types = data[:2]
        ibis_types = clickhouse_types_to_ibis_types(types)
        try:
            ibis_types = map(clickhouse_to_ibis.__getitem__, types)
        except KeyError:
            raise com.UnsupportedBackendType()

        return sch.Schema(names, ibis_types)
Ejemplo n.º 4
0
    def get_schema(
        self,
        table_name: str,
        database: str | None = None,
    ) -> sch.Schema:
        """Return a Schema object for the indicated table and database.

        Parameters
        ----------
        table_name
            Table name
        database
            Database name

        Returns
        -------
        Schema
            Ibis schema
        """
        qualified_name = self._fully_qualified_name(table_name, database)
        query = f'DESCRIBE {qualified_name}'

        # only pull out the first two columns which are names and types
        pairs = [row[:2] for row in self.con.fetchall(query)]

        names, types = zip(*pairs)
        ibis_types = [udf.parse_type(type.lower()) for type in types]
        return sch.Schema(names, ibis_types)
Ejemplo n.º 5
0
    def get_schema(self, table_name, database=None):
        """
        Return a Schema object for the indicated table and database

        Parameters
        ----------
        table_name : string
          May be fully qualified
        database : string, default None

        Returns
        -------
        schema : ibis Schema
        """
        qualified_name = self._fully_qualified_name(table_name, database)
        query = f'DESCRIBE {qualified_name}'

        # only pull out the first two columns which are names and types
        pairs = [row[:2] for row in self.con.fetchall(query)]

        names, types = zip(*pairs)
        ibis_types = [udf.parse_type(type.lower()) for type in types]
        names = [name.lower() for name in names]

        return sch.Schema(names, ibis_types)
Ejemplo n.º 6
0
    def _get_schema_using_query(self, query):
        cur = self.raw_sql(query)
        # resets the state of the cursor and closes operation
        cur.fetchall()
        names, ibis_types = self._adapt_types(cur.description)
        cur.release()

        return sch.Schema(names, ibis_types)
Ejemplo n.º 7
0
    def _get_schema_using_query(self, query):
        result = self.raw_sql(query)
        # resets the state of the cursor and closes operation
        result.cursor.fetchall()
        names, ibis_types = self._adapt_types(
            _extract_column_details(result.cursor._result.row_set.row_desc))

        return sch.Schema(names, ibis_types)
Ejemplo n.º 8
0
    def _get_schema_using_query(self, limited_query):
        type_map = {
            int: 'int64',
            bool: 'boolean',
            float: 'float64',
            str: 'string',
            datetime.datetime: 'timestamp',
        }

        with self._execute(limited_query, results=True) as cur:
            names = [row[0] for row in cur.proxy._cursor_description()]
            ibis_types = [
                type_map[row[1]] for row in cur.proxy._cursor_description()
            ]
        return sch.Schema(names, ibis_types)
Ejemplo n.º 9
0
    def partition_schema(self):
        """Return the schema for the partition columns."""
        schema = self.schema()
        name_to_type = dict(zip(schema.names, schema.types))

        result = self.partitions()

        partition_fields = []
        for x in result.columns:
            if x not in name_to_type:
                break
            partition_fields.append((x, name_to_type[x]))

        pnames, ptypes = zip(*partition_fields)
        return sch.Schema(pnames, ptypes)
Ejemplo n.º 10
0
    def _get_schema_using_query(self, query):
        cur = self.raw_sql(query)
        # resets the state of the cursor and closes operation
        cur.fetchall()
        names, ibis_types = self._adapt_types(cur.description)
        cur.release()

        # per #321; most Impala tables will be lower case already, but Avro
        # data, depending on the version of Impala, might have field names in
        # the metastore cased according to the explicit case in the declared
        # avro schema. This is very annoying, so it's easier to just conform on
        # all lowercase fields from Impala.
        names = [x.lower() for x in names]

        return sch.Schema(names, ibis_types)
Ejemplo n.º 11
0
    def schema(self):
        names = []
        types = []

        for e in self.by + self.metrics:
            if isinstance(e, ir.DestructValue):
                # If this is a destruct, then we destructure
                # the result and assign to multiple columns
                struct_type = e.type()
                for name in struct_type.names:
                    names.append(name)
                    types.append(struct_type[name])
            else:
                names.append(e.get_name())
                types.append(e.type())

        return sch.Schema(names, types)
Ejemplo n.º 12
0
def schema_from_table(table):
    """Retrieve an ibis schema from a SQLAlchemy ``Table``.

    Parameters
    ----------
    table : sa.Table

    Returns
    -------
    schema : ibis.expr.datatypes.Schema
        An ibis schema corresponding to the types of the columns in `table`.
    """
    # Convert SQLA table to Ibis schema
    types = [
        sqlalchemy_type_to_ibis_type(
            column.type,
            nullable=column.nullable,
            default_timezone='UTC',
        ) for column in table.columns.values()
    ]
    return sch.Schema(table.columns.keys(), types)
Ejemplo n.º 13
0
    def partition_schema(self):
        """
        For partitioned tables, return the schema (names and types) for the
        partition columns

        Returns
        -------
        partition_schema : ibis Schema
        """
        schema = self.schema()
        name_to_type = dict(zip(schema.names, schema.types))

        result = self.partitions()

        partition_fields = []
        for x in result.columns:
            if x not in name_to_type:
                break
            partition_fields.append((x, name_to_type[x]))

        pnames, ptypes = zip(*partition_fields)
        return sch.Schema(pnames, ptypes)
Ejemplo n.º 14
0
    def get_schema(self, table_name, database=None):
        """
        Return a Schema object for the indicated table and database

        Parameters
        ----------
        table_name : string
          May be fully qualified
        database : string, default None

        Returns
        -------
        schema : ibis Schema
        """
        qualified_name = self._fully_qualified_name(table_name, database)
        query = 'DESC {0}'.format(qualified_name)
        data, _ = self._execute(query)

        names, types = data[:2]
        ibis_types = map(clickhouse_to_ibis.get, types)

        return sch.Schema(names, ibis_types)
Ejemplo n.º 15
0
 def _get_schema_using_query(self, limited_query):
     with self._execute(limited_query, results=True) as cur:
         # resets the state of the cursor and closes operation
         names, ibis_types = self._adapt_types(cur.description)
     return sch.Schema(names, ibis_types)
    def _get_schema_using_query(self, limited_query):
        schema_df = self._execute(limited_query, results=True)
        names, ibis_types = self._adapt_types(schema_df)

        return sch.Schema(names, ibis_types)
 def get_schema(self, name, database=None):
     schema_df = self._get_teradata_schema(database, name)
     return sch.Schema(schema_df.names, schema_df.types)
Ejemplo n.º 18
0
    def schema(self):
        import ibis.expr.schema as sch

        return sch.Schema(self.names(), self.types())
Ejemplo n.º 19
0
 def _get_schema_using_query(self, query):
     _, types = self._execute(query)
     names, clickhouse_types = zip(*types)
     ibis_types = map(clickhouse_to_ibis.get, clickhouse_types)
     return sch.Schema(names, ibis_types)