def test_directionally_wrap_statement_directional(): sf = SnowflakeAdapter() sampling = BernoulliSampleMethod(50, units='probability') query = "SELECT * FROM highly_conditional_query" relmock = mock.MagicMock() relmock.scoped_cte = lambda x: x assert query_equalize( sf.directionally_wrap_statement(query, relmock, sampling)) == query_equalize(f""" WITH {relmock.scoped_cte('SNOWSHU_FINAL_SAMPLE')} AS ( {query} ) ,{relmock.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')} AS ( SELECT * FROM {relmock.scoped_cte('SNOWSHU_FINAL_SAMPLE')} SAMPLE BERNOULLI (50) ) SELECT * FROM {relmock.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')} """)
def prepare(self, relation: 'Relation', source_adapter: 'source_adapter') -> None: """Runs all nessesary pre-activities and instanciates the sample method. Prepare will be called before primary query compile time, so it can be used to do any nessesary pre-compile activites (such as collecting a histogram from the relation). Args: relation: The :class:`Relation <snowshu.core.models.relation.Relation>` object to prepare. source_adapter: The :class:`source adapter <snowshu.adapters.source_adapters.base_source_adapter.BaseSourceAdapter>` instance to use for executing prepare queries. """ self.size = max(self.sample_size_method.size(relation.population_size), self.min_sample_size) self.sample_method = BernoulliSampleMethod(self.size, units='rows')
def test_sample_statement(): sf = SnowflakeAdapter() DATABASE, SCHEMA, TABLE = [rand_string(10) for _ in range(3)] relation = Relation(database=DATABASE, schema=SCHEMA, name=TABLE, materialization=TABLE, attributes=[]) sample = sf.sample_statement_from_relation( relation, BernoulliSampleMethod(10, units="probability")) assert query_equalize(sample) == query_equalize(f""" SELECT * FROM {DATABASE}.{SCHEMA}.{TABLE} SAMPLE BERNOULLI (10) """)
def stub_out_sampling(rel: Relation) -> Relation: rel.sampling = DefaultSampling() rel.sampling.sample_method = BernoulliSampleMethod(1500, units='rows') return rel