Ejemplo n.º 1
0
def generate_block(*args, overlap):
    ''' Generate pairs, where the overlap in {'partial', 'none', 'superset'}. '''
    clauses1 = []
    clauses2 = []
    for i, arg in enumerate(args):
        attr = Attribute('x{}'.format(i))
        if arg == 'dt' or arg == 'num':
            start_dt = random.randint(2100, 3000)
            end_dt = start_dt + 1000
            start_shift = (
                random.randint(200, 500) if overlap == 'partial' else
                random.randint(2000, 2500) if overlap == 'none' else
                random.randint(-500, -200) if overlap == 'superset' else None)
            end_shift = (random.randint(200, 500) if overlap == 'partial'
                         or overlap == 'superset' else random.randint(
                             2000, 2500) if overlap == 'none' else None)
            if arg == 'num':
                clauses1.extend([Ge(attr, start_dt), Le(attr, end_dt)])
                clauses2.extend([
                    Ge(attr, start_dt + start_shift),
                    Le(attr, end_dt + end_shift)
                ])
            else:
                clauses1.extend([
                    Ge(attr, datetime.fromordinal(start_dt)),
                    Le(attr, datetime.fromordinal(end_dt))
                ])
                clauses2.extend([
                    Ge(attr, datetime.fromordinal(start_dt + start_shift)),
                    Le(attr, datetime.fromordinal(end_dt + end_shift))
                ])
        elif arg == 'arg':
            valueset = [random.randint(0, 10) for _ in range(4)]
            clauses1.append(In(attr, valueset))
            clauses2.append(
                In(attr,
                   (valueset[:2] +
                    [random.randint(20, 30)
                     for _ in range(2)] if overlap == 'partial' else
                    [random.randint(20, 30)
                     for _ in range(4)] if overlap == 'none' else valueset +
                    [random.randint(20, 30)
                     for _ in range(2)] if overlap == 'superset' else None)))
        else:
            raise ValueError(arg)

    return And(clauses1), And(clauses2)
Ejemplo n.º 2
0
* Don't use pandas - check data in python so this can go in the expressions test package.
* This isn't hitting the truth table expansion function!!
'''

import itertools

import pandas as pd
import pytest
from hypothesis import assume, event, given

from split_query.engine import query_df
from split_query.core import Attribute, to_dnf_simplified
from split_query.core.logic import get_variables
from .core.strategies import continuous_numeric_relation, expression_trees

x, y, z = [Attribute(n) for n in 'xyz']
dtx = Attribute('dtx')
point = Attribute('point')

_data = itertools.product(range(-10, 11, 4), repeat=3)
_func = lambda entry: pd.Series(
    dict(entry, point='{x}:{y}:{z}'.format(**entry)))
SOURCE_3D = pd.DataFrame(columns=['x', 'y', 'z'],
                         data=list(_data)).apply(_func, axis='columns')


@given(
    expression_trees(continuous_numeric_relation('x')
                     | continuous_numeric_relation('y'),
                     max_depth=2,
                     min_width=1,
Ejemplo n.º 3
0
import tempfile

import mock
import pandas as pd
import pytest

from split_query.cache import minimal_cache_inmemory, minimal_cache_persistent
from split_query.core import And, Or, Not, Le, Lt, Ge, Gt, Attribute
from split_query.engine import query_df


# 2D grid source data
_data = itertools.product(range(5), repeat=2)
_func = lambda entry: pd.Series(dict(entry, point='{x}:{y}'.format(**entry)))
SOURCE_2D = pd.DataFrame(columns=['x', 'y'], data=list(_data)).apply(_func, axis='columns')
X = Attribute('x')
Y = Attribute('y')


def source_query(expression):
    ''' Runs a query on the source data using pandas engine. '''
    return query_df(SOURCE_2D, expression)


def remote_iter(query):
    yield Le(X, 2), source_query(Le(X, 2))
    yield Gt(X, 2), source_query(Gt(X, 2))


# Each testcase is a sequence of query pairs: the query passed to the cache,
# and the expected query the cache should run on the remote given the queries
Ejemplo n.º 4
0
def test_filter_between(dataset):
    return (dataset[dataset.x.between(1, 3)],
            And([Ge(Attribute('x'), 1),
                 Le(Attribute('x'), 3)]))
Ejemplo n.º 5
0
def test_filter_not(dataset):
    return (dataset[~(dataset.y <= 2)], Not(Le(Attribute('y'), 2)))
Ejemplo n.º 6
0
def test_filter_isin(dataset):
    return (dataset[dataset.s.isin(['a', 'b',
                                    'c'])], In(Attribute('s'),
                                               ['a', 'b', 'c']))
Ejemplo n.º 7
0
def test_filter_and(dataset):
    return (dataset[(dataset.y < 2) & (dataset.x > 5)],
            And([Lt(Attribute('y'), 2),
                 Gt(Attribute('x'), 5)]))
Ejemplo n.º 8
0
def test_filter_or(dataset):
    return (dataset[(dataset.y == 2) | (dataset.z < 1)],
            Or([(Eq(Attribute('y'), 2)),
                Lt(Attribute('z'), 1)]))
Ejemplo n.º 9
0
def test_filter_chained(dataset):
    return (dataset[dataset.x <= 1][dataset.z >= 0],
            And([Le(Attribute('x'), 1),
                 Ge(Attribute('z'), 0)]))
Ejemplo n.º 10
0
def test_filter_eq_getitem(dataset):
    return (dataset[dataset['x'] == 4], Eq(Attribute('x'), 4))
Ejemplo n.º 11
0
def test_filter_eq(dataset):
    return (dataset[dataset.x == 4], Eq(Attribute('x'), 4))
Ejemplo n.º 12
0
import pytest

from hypothesis import event, given, strategies as st

from split_query.core import Attribute, And, Not
from split_query.core.domain import simplify_flat_and
from split_query.core.wrappers import AttributeContainer, ExpressionContainer
from .strategies import mixed_numeric_relation

x = AttributeContainer(Attribute('x'))
y = AttributeContainer(Attribute('y'))
other = ExpressionContainer('exp1')

TESTCASES = [
    # No redundancy.
    ((x > 1) & (x < 2), (x > 1) & (x < 2)),
    # Redundant bounds.
    ((x < 1) & (x < 0), (x < 0)),
    ((x > 1) & (x < 2) & (x < 3), (x < 2) & (x > 1)),
    ((x > 1) & (x < 2) & (x > 0), (x > 1) & (x < 2)),
    # Strictly lt/gt bounds are tighter.
    ((x > 1) & (x >= 1), (x > 1)),
    ((x < 1) & (x <= 1), (x < 1)),
    # Multivariate.
    (((y > 1) & (y > 2) & (y > 3) & (x <= 1) & (x >= 1) & (x < 3)),
     ((y > 3) & (x == 1))),
    # Conflict cases.
    ((x > 2) & (x < 1), False),
    ((x > 1) & (x < 1), False),
    ((x >= 1) & (x < 1), False),
    ((x > 1) & (x <= 1), False),
Ejemplo n.º 13
0
import pytest

from split_query.core import Attribute, And, Or, Not, In, Ge, Le, Lt, Gt, Eq
from split_query.extract import extract_parameters, split_parameters

XVAR = Attribute('x')
YVAR = Attribute('y')

TESTCASES_EXTRACT = [
    (In(XVAR,
        [1, 2, 3]), [dict(attr='x', type='tag', key='xtags',
                          single=False)], [(In(XVAR, [1, 2, 3]),
                                            dict(xtags={1, 2, 3}))]),
    (And([In(XVAR, [1, 2, 3]), Ge(YVAR, 2),
          Le(YVAR, 4)]), [
              dict(attr='x', type='tag', key='xtags', single=False),
              dict(attr='y',
                   type='range',
                   key_lower='from_y',
                   key_upper='to_y')
          ], [(And([In(XVAR, [1, 2, 3]),
                    Ge(YVAR, 2),
                    Le(YVAR, 4)]), dict(xtags={1, 2, 3}, from_y=2, to_y=4))]),
    (And([In(XVAR, [1, 2, 3]), In(YVAR, [4, 5, 6])]), [
        dict(attr='x', type='tag', key='xtag', single=True),
        dict(attr='y', type='tag', key='ytags', single=False)
    ], [
        (And([In(XVAR, [1]), In(YVAR,
                                [4, 5, 6])]), dict(xtag=1, ytags={4, 5, 6})),
        (And([In(XVAR, [2]), In(YVAR,
                                [4, 5, 6])]), dict(xtag=2, ytags={4, 5, 6})),