def test_all_formulas(self): self.assertEqual( get_all_formulas(read_sample("x,y\n1,2\n3,4")), set( [ frozendict({}), frozendict({"x": "1"}), frozendict({"y": "2"}), frozendict({"x": "3"}), frozendict({"y": "4"}), frozendict({"y": "2", "x": "1"}), frozendict({"y": "4", "x": "3"}), ] ), ) self.assertEqual( get_all_formulas(read_sample("x\n{1 2}\n{3 4}")), set( [ frozendict({}), frozendict({"x": frozenset(["1"])}), frozendict({"x": frozenset(["4"])}), frozendict({"x": frozenset(["3"])}), frozendict({"x": frozenset(["2"])}), frozendict({"x": frozenset(["1", "2"])}), frozendict({"x": frozenset(["3", "4"])}), ] ), )
def test_all_formulas(self): self.assertEqual( get_all_formulas(read_sample('x,y\n1,2\n3,4')), {frozenset([Cell(attribute='x', sequence=0, value='1')]), frozenset([Cell(attribute='x', sequence=0, value='3')]), frozenset([Cell(attribute='y', sequence=0, value='4')]), frozenset([Cell(attribute='x', sequence=0, value='1'), Cell(attribute='y', sequence=0, value='2')]), frozenset([Cell(attribute='y', sequence=0, value='2')]), frozenset([Cell(attribute='y', sequence=0, value='4'), Cell(attribute='x', sequence=0, value='3')])}) self.assertEqual( get_all_formulas(read_sample('x\n{1 2}\n{3 4}')), {frozenset([Cell(attribute='x', sequence=0, value='3'), Cell(attribute='x', sequence=0, value='4')]), frozenset([Cell(attribute='x', sequence=0, value='4')]), frozenset([Cell(attribute='x', sequence=0, value='1')]), frozenset([Cell(attribute='x', sequence=0, value='1'), Cell(attribute='x', sequence=0, value='2')]), frozenset([Cell(attribute='x', sequence=0, value='3')]), frozenset([Cell(attribute='x', sequence=0, value='2')])})
def test_relation(self): self.assertEqual( read_sample(example), [ frozendict({"y": "c", "x": frozenset(["a", "b"])}), frozendict({"y": "c"}), frozendict({"y": "c", "x": ("a", "d")}), ], )
def test_relation(self): self.assertEqual( read_sample(example), [frozenset([Cell(attribute='x', sequence=0, value='a'), Cell(attribute='x', sequence=0, value='b'), Cell(attribute='y', sequence=0, value='c')]), frozenset([Cell(attribute='y', sequence=0, value='c')]), frozenset([Cell(attribute='x', sequence=0, value='a'), Cell(attribute='y', sequence=0, value='c'), Cell(attribute='x', sequence=1, value='d')])])
def test_cost(self): self.assertEqual(cost(formulas=read_sample('x, y\n1\n, 4'), relation=read_sample('x, y\n1, 2\n1, 4\n1, 4')), 3) self.assertEqual(cost(formulas=read_sample('x\n{1}\n{4}'), relation=read_sample('x\n{1 2}\n{1 4}\n{1 4}')), 3) self.assertEqual(cost(formulas=read_sample('x,y,z\na,b,\n,,c'), relation=read_sample(three_attr_null)), 6)
def test_repr(self): self.assertEqual(map(tuple_rep, read_sample(example)), ['x[0]:a x[0]:b y[0]:c', 'y[0]:c', 'x[0]:a x[1]:d y[0]:c']) self.assertEqual(relation_rep(read_sample(example)), 'x[0]:a x[0]:b y[0]:c\ny[0]:c\nx[0]:a x[1]:d y[0]:c')
def test_repr(self): self.assertEqual(map(tuple_rep, read_sample(example)), ["x:{a b} y:c", "y:c", "x:[a d] y:c"])
def test_cost(self): self.assertEqual(cost(formulas=read_sample("x, y\n1\n, 4"), relation=read_sample("x, y\n1, 2\n1, 4\n1, 4")), 3) self.assertEqual(cost(formulas=read_sample("x\n{1}\n{4}"), relation=read_sample("x\n{1 2}\n{1 4}\n{1 4}")), 3)
def run(): relation = read_sample(larger_example) return find_incremental(relation, 5)
summary = improved_summary best_cost = improved_cost # resort cells all_cells = [x for x in all_cells if x[0] > 0] all_cells.sort() all_cells.reverse() return best_cost, summary if __name__ == '__main__': logger.setLevel(logging.INFO) relation = read_sample(three_attr_null) print relation_rep(relation) print best_cost, best = find_incremental(relation, 3) print('Best cost:', best_cost) print(relation_rep(best)) # speedtest logger.setLevel(logging.WARN) def run(): relation = read_sample(larger_example) return find_incremental(relation, 5)