def test_count_all(self): """ Test simply counting everything. """ query = tree([Count(), [MemScan(_RECORDS)]]) result = list(execute(query)) self.assertListEqual(result, [tuple([10])])
def test_distinct_unsorted(self): """ Distinct is not expected to handle out-of-order repeated values. """ records = [tuple([n]) for n in (0, 1, 1, 0, 0, 1, 1, 1)] query = tree([Distinct(), [MemScan(records)]]) result = list(execute(query)) self.assertListEqual(result, [tuple([n]) for n in (0, 1, 0, 1)])
def test_distinct_sorted(self): """ Typical case: expect input to be sorted. """ records = [tuple([n]) for n in (0, 0, 1, 2, 2, 2, 3, 3)] query = tree([Distinct(), [MemScan(records)]]) result = list(execute(query)) self.assertListEqual(result, [tuple([n]) for n in (0, 1, 2, 3)])
def test_average_all(self): """ Find the average of all numbers 0..9 """ query = tree( [Average(), [Projection(lambda r: [r[0]]), [MemScan(_RECORDS)]]]) result = list(execute(query)) self.assertListEqual(result, [tuple([4.5])])
def test_count_selected(self): """ Test counting the output of a selection. """ for p in _PREDICATES: query = tree([Count(), [Selection(p), [MemScan(_RECORDS)]]]) result = list(execute(query)) expected = [tuple([len([r for r in _RECORDS if p(r)])])] self.assertListEqual(result, expected)
def test_cartesian_product(self): """ When the theta function simply returns True, we achieve cross product. """ query = tree( [NestedLoopsJoin(lambda r, s: True), [MemScan(self.R_RECORDS)], [MemScan(self.S_RECORDS)]]) result = list(execute(query)) expected = [r + s for r in self.R_RECORDS for s in self.S_RECORDS] self.assertListEqual(result, expected)
def test_self_join(self): """ It's fine for a table to be joined to itself. """ query = tree( [NestedLoopsJoin(lambda r, s: True), [MemScan(self.R_RECORDS)], [MemScan(self.R_RECORDS)]]) result = list(execute(query)) expected = [r + s for r in self.R_RECORDS for s in self.R_RECORDS] self.assertListEqual(result, expected)
def test_project_various(self): mapping_functions = [ lambda r: [r[1]], # project to a single column lambda r: [r[1], r[0]], # switch columns lambda r: [r[0] + r[1]], # sums of fields lambda r: [r[1] > 8], # results of boolean operations ] records = [(n, n * n) for n in range(10)] for f in mapping_functions: query = tree([Projection(f), [MemScan(records)]]) result = list(execute(query)) expected = [tuple(f(r)) for r in records] self.assertListEqual(result, expected)
def test_select_by_id(self): """ What is the name of the movie with id 5000? """ query = tree([ Projection(lambda r: tuple([r.name])), [ Selection(lambda r: r.id == '5000'), [FileScan(movies_file, MovieRecord)] ] ]) self.assertListEqual(['Medium Cool (1969)'], [r[0] for r in execute(query)])
def test_select_various_predicates(self): predicates = [ lambda r: r[0] % 2 == 1, # odd values lambda r: r[1] % 2 == 1, # odd squares lambda r: r[0] + r[1] == 30, # just (5, 25) lambda r: r[1] == 'Reginald', # no records ] records = [(n, n * n) for n in range(10)] for p in predicates: query = tree([Selection(p), [MemScan(records)]]) result = list(execute(query)) expected = [r for r in records if p(r)] self.assertListEqual(result, expected)
def test_average_selected(self): """ Find the average of only filtered numbers in the range 0..9 """ query = tree([ Average(), [ Projection(lambda r: [r[0]]), [Selection(lambda r: r[0] % 2 == 1), [MemScan(_RECORDS)]] ] ]) result = list(execute(query)) self.assertListEqual(result, [tuple([5])])
def test_equijoin(self): """ The join condition can be that the value in one field equals another. """ query = tree( [NestedLoopsJoin(lambda r, s: r[1] == s[0]), [MemScan(self.R_RECORDS)], [MemScan(self.S_RECORDS)]]) result = list(execute(query)) expected = [ (0, 'a', 'a', 'apple'), (1, 'b', 'b', 'banana'), (1, 'b', 'b', 'burger') ] self.assertListEqual(result, expected)
def test_equijoin(self): """ The a simple join by field field value. """ query = tree([ MergeJoin(lambda r: r[1], lambda s: s[0]), [MemScan(self.R_RECORDS)], [MemScan(self.S_RECORDS)] ]) result = list(execute(query)) expected = [ (0, 'a', 'a', 'apple'), (1, 'b', 'b', 'banana'), (1, 'b', 'b', 'burger'), (3, 'd', 'd', 'domato'), ] self.assertListEqual(result, expected)
def test_average_rating(self): """ What is the average rating for movie with id 5000? WARNING: super slow """ query = tree([ Average(), [ Projection(lambda r: tuple([float(r.rating)])), [ Selection(lambda r: r.movie_id == '5000'), [FileScan(ratings_file, RatingRecord)] ] ] ]) self.assertAlmostEqual(3.5, next(execute(query))[0], places=2)
def test_inequality(self): """ The join condition can be an inequality. """ query = tree( [NestedLoopsJoin(lambda r, s: r[0] > s[0]), [MemScan(self.R_RECORDS)], [MemScan(self.R_RECORDS)]]) result = list(execute(query)) expected = [ (1, 'b', 0, 'a'), (1, 'c', 0, 'a'), (2, 'c', 0, 'a'), (2, 'c', 1, 'b'), (2, 'c', 1, 'c'), ] self.assertListEqual(result, expected)
def test_self_join(self): """ It's fine for a table to be joined to itself. """ query = tree([ MergeJoin(lambda r: r[0], lambda s: s[0]), [MemScan(self.S_RECORDS)], [MemScan(self.S_RECORDS)] ]) result = list(execute(query)) expected = [ ('a', 'apple', 'a', 'apple'), ('b', 'banana', 'b', 'banana'), ('b', 'banana', 'b', 'burger'), ('b', 'burger', 'b', 'banana'), ('b', 'burger', 'b', 'burger'), ('d', 'domato', 'd', 'domato'), ] self.assertListEqual(result, expected)
def test_three_way(self): """ The input to a join can be a join. """ query = tree( [NestedLoopsJoin(lambda r, s: r[3] == s[0]), [NestedLoopsJoin(lambda r, s: r[0] > s[0]), [MemScan(self.R_RECORDS)], [MemScan(self.R_RECORDS)]], [MemScan(self.S_RECORDS)]]) result = list(execute(query)) expected = [ (1, 'b', 0, 'a', 'a', 'apple'), (1, 'c', 0, 'a', 'a', 'apple'), (2, 'c', 0, 'a', 'a', 'apple'), (2, 'c', 1, 'b', 'b', 'banana'), (2, 'c', 1, 'b', 'b', 'burger'), ] self.assertListEqual(result, expected)
def test_count_rated_movies(self): """ How many distinct movies have a rating? WARNING: super slow """ query = tree([ Count(), [ Distinct(), [ Sort(lambda r: r[0]), [ Projection(lambda r: tuple([r.movie_id])), [FileScan(ratings_file, RatingRecord)] ] ] ] ]) # TODO: shouldn't this be 26744? self.assertEqual(26745, next(execute(query))[0])
def test_three_way(self): """ The input to a join can be a join. """ query = tree([ MergeJoin(lambda r: r[1], lambda s: s[0]), [MemScan(self.R_RECORDS)], [ MergeJoin(lambda r: r[0], lambda s: s[0]), [MemScan(self.S_RECORDS)], [MemScan(self.S_RECORDS)] ] ]) result = list(execute(query)) expected = [ (0, 'a', 'a', 'apple', 'a', 'apple'), (1, 'b', 'b', 'banana', 'b', 'banana'), (1, 'b', 'b', 'banana', 'b', 'burger'), (1, 'b', 'b', 'burger', 'b', 'banana'), (1, 'b', 'b', 'burger', 'b', 'burger'), (3, 'd', 'd', 'domato', 'd', 'domato'), ] self.assertListEqual(result, expected)