Esempio n. 1
0
def generate_block(*args, overlap):
    ''' Generate pairs, where the overlap in {'partial', 'none', 'superset'}. '''
    clauses1 = []
    clauses2 = []
    for i, arg in enumerate(args):
        attr = Attribute('x{}'.format(i))
        if arg == 'dt' or arg == 'num':
            start_dt = random.randint(2100, 3000)
            end_dt = start_dt + 1000
            start_shift = (
                random.randint(200, 500) if overlap == 'partial' else
                random.randint(2000, 2500) if overlap == 'none' else
                random.randint(-500, -200) if overlap == 'superset' else None)
            end_shift = (random.randint(200, 500) if overlap == 'partial'
                         or overlap == 'superset' else random.randint(
                             2000, 2500) if overlap == 'none' else None)
            if arg == 'num':
                clauses1.extend([Ge(attr, start_dt), Le(attr, end_dt)])
                clauses2.extend([
                    Ge(attr, start_dt + start_shift),
                    Le(attr, end_dt + end_shift)
                ])
            else:
                clauses1.extend([
                    Ge(attr, datetime.fromordinal(start_dt)),
                    Le(attr, datetime.fromordinal(end_dt))
                ])
                clauses2.extend([
                    Ge(attr, datetime.fromordinal(start_dt + start_shift)),
                    Le(attr, datetime.fromordinal(end_dt + end_shift))
                ])
        elif arg == 'arg':
            valueset = [random.randint(0, 10) for _ in range(4)]
            clauses1.append(In(attr, valueset))
            clauses2.append(
                In(attr,
                   (valueset[:2] +
                    [random.randint(20, 30)
                     for _ in range(2)] if overlap == 'partial' else
                    [random.randint(20, 30)
                     for _ in range(4)] if overlap == 'none' else valueset +
                    [random.randint(20, 30)
                     for _ in range(2)] if overlap == 'superset' else None)))
        else:
            raise ValueError(arg)

    return And(clauses1), And(clauses2)
Esempio n. 2
0
def bench(path, name):
    for i, (_, current, cached) in enumerate(path):
        intersection = And([current, cached])
        difference = And([current, Not(cached)])

        def run_intersection():
            expand_dnf_simplify(intersection)

        def run_difference():
            expand_dnf_simplify(difference)

        yield dict(name=name,
                   ind=i,
                   clauses=len(get_clauses(intersection)),
                   intersection=min(
                       timeit.repeat(run_intersection, number=1, repeat=1)),
                   difference=min(
                       timeit.repeat(run_difference, number=1, repeat=1)))
Esempio n. 3
0
def test_simplify_flat_and_fuzz(clauses):
    ''' Currently a simple error check, but this should really validate
    algorithm guarantees by checking against a data set. '''
    result = simplify_flat_and(And(clauses))
    n_output = len(result.clauses) if type(result) is And else 1
    assert n_output <= len(clauses)
    if result is False:
        event('Simplified False')
    else:
        if n_output < len(clauses):
            event('Shortened')
Esempio n. 4
0
     ]),
 # Exact repetition.
 (None, [
     (Le(X, 3), Le(X, 3)),
     (Le(X, 3), None),
     ]),
 # Following queries are subsets.
 (None, [
     (Le(X, 3), Le(X, 3)),
     (Le(X, 2), None),
     (Le(X, 1), None),
     ]),
 # Following queries overlap partially.
 (None, [
     (Le(X, 1), Le(X, 1)),
     (Le(X, 3), And([Gt(X, 1), Le(X, 3)])),
     ]),
 (None, [
     (Le(X, 2), Le(X, 2)),
     (Ge(X, 1), Gt(X, 2)),
     ]),
 # Multiple cache records stored.
 (None, [
     (Le(X, 1), Le(X, 1)),
     (Ge(X, 3), Ge(X, 3)),
     (Le(X, 0), None),
     (Ge(X, 4), None),
     ]),
 # Assembling a result from partial cached queries.
 (None, [
     (Le(X, 2), Le(X, 2)),
Esempio n. 5
0
def remainder(expr1, expr2):
    expression = And([expr1, Not(expr2)])
    expression = simplify(expression)
    return expression
Esempio n. 6
0
def remains(expr1, expr2):
    expression = And([expr1, Not(expr2)])
    expression = simplify(expression)
    return expression is not False
Esempio n. 7
0
def intersection(expr1, expr2):
    expression = And([expr1, expr2])
    expression = simplify(expression)
    return expression
Esempio n. 8
0
def intersects(expr1, expr2):
    ''' Return whether e1 intersects with e2. '''
    expression = And([expr1, expr2])
    expression = simplify(expression)
    return expression is not False
Esempio n. 9
0
'''
Profiling a large-ish DNF expansion, shows a couple of things:
    - A few operations on boolean logic can be short-circuited
    - There is a lot of hashing done, for example any creation of an And/Or
    adds expressions to a set, relations are stored in a dict to calculate
    the truth table. Both structures are hash table backed, but the first is
    likely unnecessary and the second usually has very small tables. Hashing
    seems to be faster than comparison, so the structure is probably still
    the correct one; just need to minimise the number of lookups.
python -m cProfile -o profile.out testcase.py && cprofilev -f profile.out
'''

import json
from split_query.core import object_hook, And, Not
from split_query.core.expand import expand_dnf_simplify

with open('path_persistent.json') as infile:
    _, e1, e2 = json.load(infile, object_hook=object_hook)[3]

expand_dnf_simplify(And([e1, Not(e2)]))
Esempio n. 10
0
def test_filter_between(dataset):
    return (dataset[dataset.x.between(1, 3)],
            And([Ge(Attribute('x'), 1),
                 Le(Attribute('x'), 3)]))
Esempio n. 11
0
def test_filter_and(dataset):
    return (dataset[(dataset.y < 2) & (dataset.x > 5)],
            And([Lt(Attribute('y'), 2),
                 Gt(Attribute('x'), 5)]))
Esempio n. 12
0
def test_filter_chained(dataset):
    return (dataset[dataset.x <= 1][dataset.z >= 0],
            And([Le(Attribute('x'), 1),
                 Ge(Attribute('z'), 0)]))
Esempio n. 13
0
    (x.isin([1, 2, 3]) & x.isin([2, 3, 4]), x.isin([2, 3])),
    (~x.isin([1, 2, 3]) & ~x.isin([2, 3, 4]), ~x.isin([1, 2, 3, 4])),
    (x.isin([1, 2, 3]) & ~x.isin([2, 3, 4]), (x == 1)),
    (x.isin([1, 2, 3]) & x.isin([4, 5, 6]), False),
    (x.isin([1, 2, 3]) & ~x.isin([1, 2, 3, 4]), False),
    # Combined bounds + sets.
    ((x == 1) & (x >= 0), (x == 1)),
    ((x == 1) & (x == 2), False),
    (x.isin([0, 1, 2]) & (x < 2), x.isin([0, 1])),
    (x.isin([1, 2, 3]) & (x <= 2), x.isin([1, 2])),
    (x.isin([1, 2, 3]) & (x > 3), False),
    # Edge cases
    (~(x == 0) & x.isin([0]), False),
    (~(x == 0) & (x == 0), False),
    (~(~(x == 0)) & (y == 0), (x == 0) & (y == 0)),
    (And([True]), True),
    (And([False]), False),
    # Found in cache tests.
    ((x >= 2014) & (x < 2015) & ~(x == 2015), (x >= 2014) & (x < 2015)),
    ((x >= 2015) & (x <= 2015) & ~(x == 2015), False),
]


@pytest.mark.parametrize('expression, simplified', TESTCASES)
def test_simplify_flat_and(expression, simplified):
    ''' Obviously simplifiable cases to define algorithm behaviour. Should be
    reducible to a simpler set. '''
    if type(expression) is ExpressionContainer:
        expression = expression.wrapped
    if type(simplified) is ExpressionContainer:
        simplified = simplified.wrapped
Esempio n. 14
0
import pytest

from split_query.core import Attribute, And, Or, Not, In, Ge, Le, Lt, Gt, Eq
from split_query.extract import extract_parameters, split_parameters

XVAR = Attribute('x')
YVAR = Attribute('y')

TESTCASES_EXTRACT = [
    (In(XVAR,
        [1, 2, 3]), [dict(attr='x', type='tag', key='xtags',
                          single=False)], [(In(XVAR, [1, 2, 3]),
                                            dict(xtags={1, 2, 3}))]),
    (And([In(XVAR, [1, 2, 3]), Ge(YVAR, 2),
          Le(YVAR, 4)]), [
              dict(attr='x', type='tag', key='xtags', single=False),
              dict(attr='y',
                   type='range',
                   key_lower='from_y',
                   key_upper='to_y')
          ], [(And([In(XVAR, [1, 2, 3]),
                    Ge(YVAR, 2),
                    Le(YVAR, 4)]), dict(xtags={1, 2, 3}, from_y=2, to_y=4))]),
    (And([In(XVAR, [1, 2, 3]), In(YVAR, [4, 5, 6])]), [
        dict(attr='x', type='tag', key='xtag', single=True),
        dict(attr='y', type='tag', key='ytags', single=False)
    ], [
        (And([In(XVAR, [1]), In(YVAR,
                                [4, 5, 6])]), dict(xtag=1, ytags={4, 5, 6})),
        (And([In(XVAR, [2]), In(YVAR,
                                [4, 5, 6])]), dict(xtag=2, ytags={4, 5, 6})),