Python Snowflake 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: jx_bigquery.snowflakes

클래스/타입: Snowflake

hotexamples.com에서의 예제들: 3

Python Snowflake - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 jx_bigquery.snowflakes.Snowflake에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

parse(2)

Snowflake(1)

leaves(1)

to_bq_schema(1)

자주 사용되는 메소드들

parse (2)

Snowflake (1)

leaves (1)

to_bq_schema (1)

예제 #1

파일 보기

    def create_table(
        self,
        table,
        schema=None,
        typed=True,
        read_only=True,  # TO PREVENT ACCIDENTAL WRITING
        sharded=False,
        partition=Null,  # PARTITION RULES
        cluster=None,  # TUPLE OF FIELDS TO SORT DATA
        top_level_fields=Null,
        kwargs=None,
    ):
        if kwargs.lookup != None or kwargs.flake != None:
            Log.error("expecting schema, not lookup")
        full_name = self.full_name + escape_name(table)
        if not schema:
            # WE MUST HAVE SOMETHING
            if typed:
                schema = copy(DEFAULT_TYPED_SCHEMA)
            else:
                schema = copy(DEFAULT_SCHEMA)

        flake = Snowflake(text(full_name), top_level_fields, partition, schema=schema)

        if read_only:
            Log.error("Can not create a table for read-only use")

        if sharded:
            shard_name = escape_name(table + "_" + "".join(randoms.sample(ALLOWED, 20)))
            shard_api_name = self.full_name + shard_name
            _shard = bigquery.Table(text(shard_api_name), schema=flake.to_bq_schema())
            _shard.time_partitioning = unwrap(flake._partition.bq_time_partitioning)
            _shard.clustering_fields = [
                c.es_column
                for f in listwrap(cluster)
                for c in [first(flake.leaves(f))]
                if c
            ] or None
            self.shard = self.client.create_table(_shard)
            self.create_view(full_name, shard_api_name)
        else:
            _table = bigquery.Table(text(full_name), schema=flake.to_bq_schema())
            _table.time_partitioning = unwrap(flake._partition.bq_time_partitioning)
            _table.clustering_fields = [
                l.es_column for f in listwrap(cluster) for l in flake.leaves(f)
            ] or None
            self.client.create_table(_table)
            DEBUG and Log.note("created table {{table}}", table=_table.table_id)

        return Table(
            table=table,
            typed=typed,
            read_only=read_only,
            sharded=sharded,
            partition=partition,
            top_level_fields=top_level_fields,
            kwargs=kwargs,
            container=self,
        )

예제 #2

파일 보기

    def merge_shards(self):
        shards = []
        tables = list(self.container.client.list_tables(self.container.dataset))
        current_view = Null  # VIEW THAT POINTS TO PRIMARY SHARD
        primary_shard_name = None  # PRIMARY SHARD
        api_name = escape_name(self.short_name)

        for table_item in tables:
            table = table_item.reference
            table_api_name = ApiName(table.table_id)
            if text(table_api_name).startswith(text(api_name)):
                if table_api_name == api_name:
                    if table_item.table_type != "VIEW":
                        Log.error("expecting {{table}} to be a view", table=api_name)
                    current_view = self.container.client.get_table(table)
                    view_sql = current_view.view_query
                    primary_shard_name = _extract_primary_shard_name(view_sql)
                elif SUFFIX_PATTERN.match(text(table_api_name)[len(text(api_name)) :]):
                    try:
                        known_table = self.container.client.get_table(table)
                        shards.append(known_table)
                    except Exception as e:
                        Log.warning(
                            "could not merge table {{table}}", table=table, cause=e
                        )

        if not current_view:
            Log.error(
                "expecting {{table}} to be a view pointing to a table", table=api_name
            )

        shard_flakes = [
            Snowflake.parse(
                big_query_schema=shard.schema,
                es_index=text(self.container.full_name + ApiName(shard.table_id)),
                top_level_fields=self.top_level_fields,
                partition=self.partition,
            )
            for shard in shards
        ]
        total_flake = snowflakes.merge(
            shard_flakes,
            es_index=text(self.full_name),
            top_level_fields=self.top_level_fields,
            partition=self.partition,
        )

        for i, s in enumerate(shards):
            if ApiName(s.table_id) == primary_shard_name:
                if total_flake == shard_flakes[i]:
                    # USE THE CURRENT PRIMARY SHARD AS A DESTINATION
                    del shards[i]
                    del shard_flakes[i]
                    break
        else:
            name = self.short_name + "_" + "".join(randoms.sample(ALLOWED, 20))
            primary_shard_name = escape_name(name)
            self.container.create_table(
                table=name,
                schema=total_flake.schema,
                sharded=False,
                read_only=False,
                kwargs=self.config,
            )

        primary_full_name = self.container.full_name + primary_shard_name

        selects = []
        for flake, table in zip(shard_flakes, shards):
            q = ConcatSQL(
                SQL_SELECT,
                JoinSQL(ConcatSQL(SQL_COMMA, SQL_CR), gen_select(total_flake, flake)),
                SQL_FROM,
                quote_column(ApiName(table.dataset_id, table.table_id)),
            )
            selects.append(q)

        DEBUG and Log.note(
            "inserting into table {{table}}", table=text(primary_shard_name)
        )
        matched = []
        unmatched = []
        for sel, shard, flake in zip(selects, shards, shard_flakes):
            if flake == total_flake:
                matched.append((sel, shard, flake))
            else:
                unmatched.append((sel, shard, flake))

        # EVERYTHING THAT IS IDENTICAL TO PRIMARY CAN BE MERGED WITH SIMPLE UNION ALL
        if matched:
            for g, merge_chunk in jx.chunk(matched, MAX_MERGE):
                command = ConcatSQL(
                    SQL_INSERT,
                    quote_column(primary_full_name),
                    JoinSQL(
                        SQL_UNION_ALL,
                        (
                            sql_query(
                                {
                                    "from": text(
                                        self.container.full_name
                                        + ApiName(shard.table_id)
                                    )
                                },
                                schema,
                            )
                            for _, shard, schema in merge_chunk
                        ),
                    ),
                )
                DEBUG and Log.note("{{sql}}", sql=text(command))
                job = self.container.query_and_wait(command)
                DEBUG and Log.note(
                    "job {{id}} state = {{state}}", id=job.job_id, state=job.state
                )

                if job.errors:
                    Log.error(
                        "\n{{sql}}\nDid not fill table:\n{{reason|json|indent}}",
                        sql=command.sql,
                        reason=job.errors,
                    )
                for _, shard, _ in merge_chunk:
                    self.container.client.delete_table(shard)

        # ALL OTHER SCHEMAS MISMATCH
        for s, shard, _ in unmatched:
            try:
                command = ConcatSQL(SQL_INSERT, quote_column(primary_full_name), s)
                DEBUG and Log.note("{{sql}}", sql=text(command))
                job = self.container.query_and_wait(command)
                DEBUG and Log.note(
                    "from {{shard}}, job {{id}}, state {{state}}",
                    id=job.job_id,
                    shard=shard.table_id,
                    state=job.state,
                )

                if job.errors:
                    if all(
                        " does not have a schema." in m
                        for m in wrap(job.errors).message
                    ):
                        pass  # NOTHING TO DO
                    else:
                        Log.error(
                            "\n{{sql}}\nDid not fill table:\n{{reason|json|indent}}",
                            sql=command.sql,
                            reason=job.errors,
                        )

                self.container.client.delete_table(shard)
            except Exception as e:
                Log.warning("failure to merge {{shard}}", shard=shard, cause=e)

        # REMOVE OLD VIEW
        view_full_name = self.container.full_name + api_name
        if current_view:
            self.container.client.delete_table(current_view)

        # CREATE NEW VIEW
        self.container.create_view(view_full_name, primary_full_name)

예제 #3

파일 보기

    def __init__(
        self,
        table,
        typed,
        read_only,
        sharded,
        container,
        id=Null,
        partition=Null,
        cluster=Null,
        top_level_fields=Null,
        kwargs=None,
    ):
        self.short_name = table
        self.typed = typed
        self.read_only = read_only
        self.cluster = cluster
        self.id = id
        self.top_level_fields = top_level_fields
        self.config = Data(  # USED TO REPLICATE THIS
            typed=typed,
            read_only=read_only,
            sharded=sharded,
            id=id,
            partition=partition,
            cluster=cluster,
            top_level_fields=top_level_fields,
        )

        esc_name = escape_name(table)
        self.full_name = container.full_name + esc_name
        self.alias_view = alias_view = container.client.get_table(text(self.full_name))
        self.partition = partition
        self.container = container

        if not sharded:
            if not read_only and alias_view.table_type == "VIEW":
                Log.error("Expecting a table, not a view")
            self.shard = alias_view
            self._flake = Snowflake.parse(
                alias_view.schema,
                text(self.full_name),
                self.top_level_fields,
                partition,
            )
        else:
            if alias_view.table_type != "VIEW":
                Log.error("Sharded tables require a view")
            current_view = container.client.get_table(text(self.full_name))
            view_sql = current_view.view_query
            shard_name = _extract_primary_shard_name(view_sql)
            try:
                self.shard = container.client.get_table(
                    text(container.full_name + shard_name)
                )
                self._flake = Snowflake.parse(
                    alias_view.schema,
                    text(self.full_name),
                    self.top_level_fields,
                    partition,
                )
            except Exception as e:
                Log.warning("view {{name}} is invalid", name=shard_name, cause=e)
                self._flake = Snowflake.parse(
                    alias_view.schema,
                    text(self.full_name),
                    self.top_level_fields,
                    partition,
                )
                # REMOVE STALE VIEW
                container.client.delete_table(current_view)

                # MAKE NEW VIEW POINTING TO NEW SHARD
                self._create_new_shard()
                container.create_view(
                    self.full_name,
                    self.container.full_name + ApiName(self.shard.table_id),
                )

        self.last_extend = Date.now() - EXTEND_LIMIT
        self.extend_locker = Lock()
        self.extend_queue = Queue("wait for extend")