def generate_block(*args, overlap): ''' Generate pairs, where the overlap in {'partial', 'none', 'superset'}. ''' clauses1 = [] clauses2 = [] for i, arg in enumerate(args): attr = Attribute('x{}'.format(i)) if arg == 'dt' or arg == 'num': start_dt = random.randint(2100, 3000) end_dt = start_dt + 1000 start_shift = ( random.randint(200, 500) if overlap == 'partial' else random.randint(2000, 2500) if overlap == 'none' else random.randint(-500, -200) if overlap == 'superset' else None) end_shift = (random.randint(200, 500) if overlap == 'partial' or overlap == 'superset' else random.randint( 2000, 2500) if overlap == 'none' else None) if arg == 'num': clauses1.extend([Ge(attr, start_dt), Le(attr, end_dt)]) clauses2.extend([ Ge(attr, start_dt + start_shift), Le(attr, end_dt + end_shift) ]) else: clauses1.extend([ Ge(attr, datetime.fromordinal(start_dt)), Le(attr, datetime.fromordinal(end_dt)) ]) clauses2.extend([ Ge(attr, datetime.fromordinal(start_dt + start_shift)), Le(attr, datetime.fromordinal(end_dt + end_shift)) ]) elif arg == 'arg': valueset = [random.randint(0, 10) for _ in range(4)] clauses1.append(In(attr, valueset)) clauses2.append( In(attr, (valueset[:2] + [random.randint(20, 30) for _ in range(2)] if overlap == 'partial' else [random.randint(20, 30) for _ in range(4)] if overlap == 'none' else valueset + [random.randint(20, 30) for _ in range(2)] if overlap == 'superset' else None))) else: raise ValueError(arg) return And(clauses1), And(clauses2)
* Don't use pandas - check data in python so this can go in the expressions test package. * This isn't hitting the truth table expansion function!! ''' import itertools import pandas as pd import pytest from hypothesis import assume, event, given from split_query.engine import query_df from split_query.core import Attribute, to_dnf_simplified from split_query.core.logic import get_variables from .core.strategies import continuous_numeric_relation, expression_trees x, y, z = [Attribute(n) for n in 'xyz'] dtx = Attribute('dtx') point = Attribute('point') _data = itertools.product(range(-10, 11, 4), repeat=3) _func = lambda entry: pd.Series( dict(entry, point='{x}:{y}:{z}'.format(**entry))) SOURCE_3D = pd.DataFrame(columns=['x', 'y', 'z'], data=list(_data)).apply(_func, axis='columns') @given( expression_trees(continuous_numeric_relation('x') | continuous_numeric_relation('y'), max_depth=2, min_width=1,
import tempfile import mock import pandas as pd import pytest from split_query.cache import minimal_cache_inmemory, minimal_cache_persistent from split_query.core import And, Or, Not, Le, Lt, Ge, Gt, Attribute from split_query.engine import query_df # 2D grid source data _data = itertools.product(range(5), repeat=2) _func = lambda entry: pd.Series(dict(entry, point='{x}:{y}'.format(**entry))) SOURCE_2D = pd.DataFrame(columns=['x', 'y'], data=list(_data)).apply(_func, axis='columns') X = Attribute('x') Y = Attribute('y') def source_query(expression): ''' Runs a query on the source data using pandas engine. ''' return query_df(SOURCE_2D, expression) def remote_iter(query): yield Le(X, 2), source_query(Le(X, 2)) yield Gt(X, 2), source_query(Gt(X, 2)) # Each testcase is a sequence of query pairs: the query passed to the cache, # and the expected query the cache should run on the remote given the queries
def test_filter_between(dataset): return (dataset[dataset.x.between(1, 3)], And([Ge(Attribute('x'), 1), Le(Attribute('x'), 3)]))
def test_filter_not(dataset): return (dataset[~(dataset.y <= 2)], Not(Le(Attribute('y'), 2)))
def test_filter_isin(dataset): return (dataset[dataset.s.isin(['a', 'b', 'c'])], In(Attribute('s'), ['a', 'b', 'c']))
def test_filter_and(dataset): return (dataset[(dataset.y < 2) & (dataset.x > 5)], And([Lt(Attribute('y'), 2), Gt(Attribute('x'), 5)]))
def test_filter_or(dataset): return (dataset[(dataset.y == 2) | (dataset.z < 1)], Or([(Eq(Attribute('y'), 2)), Lt(Attribute('z'), 1)]))
def test_filter_chained(dataset): return (dataset[dataset.x <= 1][dataset.z >= 0], And([Le(Attribute('x'), 1), Ge(Attribute('z'), 0)]))
def test_filter_eq_getitem(dataset): return (dataset[dataset['x'] == 4], Eq(Attribute('x'), 4))
def test_filter_eq(dataset): return (dataset[dataset.x == 4], Eq(Attribute('x'), 4))
import pytest from hypothesis import event, given, strategies as st from split_query.core import Attribute, And, Not from split_query.core.domain import simplify_flat_and from split_query.core.wrappers import AttributeContainer, ExpressionContainer from .strategies import mixed_numeric_relation x = AttributeContainer(Attribute('x')) y = AttributeContainer(Attribute('y')) other = ExpressionContainer('exp1') TESTCASES = [ # No redundancy. ((x > 1) & (x < 2), (x > 1) & (x < 2)), # Redundant bounds. ((x < 1) & (x < 0), (x < 0)), ((x > 1) & (x < 2) & (x < 3), (x < 2) & (x > 1)), ((x > 1) & (x < 2) & (x > 0), (x > 1) & (x < 2)), # Strictly lt/gt bounds are tighter. ((x > 1) & (x >= 1), (x > 1)), ((x < 1) & (x <= 1), (x < 1)), # Multivariate. (((y > 1) & (y > 2) & (y > 3) & (x <= 1) & (x >= 1) & (x < 3)), ((y > 3) & (x == 1))), # Conflict cases. ((x > 2) & (x < 1), False), ((x > 1) & (x < 1), False), ((x >= 1) & (x < 1), False), ((x > 1) & (x <= 1), False),
import pytest from split_query.core import Attribute, And, Or, Not, In, Ge, Le, Lt, Gt, Eq from split_query.extract import extract_parameters, split_parameters XVAR = Attribute('x') YVAR = Attribute('y') TESTCASES_EXTRACT = [ (In(XVAR, [1, 2, 3]), [dict(attr='x', type='tag', key='xtags', single=False)], [(In(XVAR, [1, 2, 3]), dict(xtags={1, 2, 3}))]), (And([In(XVAR, [1, 2, 3]), Ge(YVAR, 2), Le(YVAR, 4)]), [ dict(attr='x', type='tag', key='xtags', single=False), dict(attr='y', type='range', key_lower='from_y', key_upper='to_y') ], [(And([In(XVAR, [1, 2, 3]), Ge(YVAR, 2), Le(YVAR, 4)]), dict(xtags={1, 2, 3}, from_y=2, to_y=4))]), (And([In(XVAR, [1, 2, 3]), In(YVAR, [4, 5, 6])]), [ dict(attr='x', type='tag', key='xtag', single=True), dict(attr='y', type='tag', key='ytags', single=False) ], [ (And([In(XVAR, [1]), In(YVAR, [4, 5, 6])]), dict(xtag=1, ytags={4, 5, 6})), (And([In(XVAR, [2]), In(YVAR, [4, 5, 6])]), dict(xtag=2, ytags={4, 5, 6})),