def test_directionally_wrap_statement_directional():
    sf = SnowflakeAdapter()
    sampling = BernoulliSampleMethod(50, units='probability')
    query = "SELECT * FROM highly_conditional_query"
    relmock = mock.MagicMock()
    relmock.scoped_cte = lambda x: x
    assert query_equalize(
        sf.directionally_wrap_statement(query, relmock,
                                        sampling)) == query_equalize(f"""
WITH
    {relmock.scoped_cte('SNOWSHU_FINAL_SAMPLE')} AS (
{query}
)
,{relmock.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')} AS (
SELECT
    *
FROM
    {relmock.scoped_cte('SNOWSHU_FINAL_SAMPLE')}
SAMPLE BERNOULLI (50)
)
SELECT 
    *
FROM 
    {relmock.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')}
""")
    def prepare(self, relation: 'Relation',
                source_adapter: 'source_adapter') -> None:
        """Runs all nessesary pre-activities and instanciates the sample method.

        Prepare will be called before primary query compile time, so it can be used
        to do any nessesary pre-compile activites (such as collecting a histogram from the relation).

        Args:
            relation: The :class:`Relation <snowshu.core.models.relation.Relation>` object to prepare.
            source_adapter: The :class:`source adapter <snowshu.adapters.source_adapters.base_source_adapter.BaseSourceAdapter>` instance to use for executing prepare queries. 
        """
        self.size = max(self.sample_size_method.size(relation.population_size),
                        self.min_sample_size)
        self.sample_method = BernoulliSampleMethod(self.size, units='rows')
def test_sample_statement():
    sf = SnowflakeAdapter()
    DATABASE, SCHEMA, TABLE = [rand_string(10) for _ in range(3)]
    relation = Relation(database=DATABASE,
                        schema=SCHEMA,
                        name=TABLE,
                        materialization=TABLE,
                        attributes=[])
    sample = sf.sample_statement_from_relation(
        relation, BernoulliSampleMethod(10, units="probability"))
    assert query_equalize(sample) == query_equalize(f"""
SELECT
    *
FROM 
    {DATABASE}.{SCHEMA}.{TABLE}
    SAMPLE BERNOULLI (10)
""")
예제 #4
0
def stub_out_sampling(rel: Relation) -> Relation:
    rel.sampling = DefaultSampling()
    rel.sampling.sample_method = BernoulliSampleMethod(1500, units='rows')
    return rel