コード例 #1
0
ファイル: tests.py プロジェクト: domoritz/summarization
    def test_all_formulas(self):
        self.assertEqual(
            get_all_formulas(read_sample("x,y\n1,2\n3,4")),
            set(
                [
                    frozendict({}),
                    frozendict({"x": "1"}),
                    frozendict({"y": "2"}),
                    frozendict({"x": "3"}),
                    frozendict({"y": "4"}),
                    frozendict({"y": "2", "x": "1"}),
                    frozendict({"y": "4", "x": "3"}),
                ]
            ),
        )

        self.assertEqual(
            get_all_formulas(read_sample("x\n{1 2}\n{3 4}")),
            set(
                [
                    frozendict({}),
                    frozendict({"x": frozenset(["1"])}),
                    frozendict({"x": frozenset(["4"])}),
                    frozendict({"x": frozenset(["3"])}),
                    frozendict({"x": frozenset(["2"])}),
                    frozendict({"x": frozenset(["1", "2"])}),
                    frozendict({"x": frozenset(["3", "4"])}),
                ]
            ),
        )
コード例 #2
0
ファイル: tests.py プロジェクト: domoritz/summarization
    def test_all_formulas(self):
        self.assertEqual(
            get_all_formulas(read_sample('x,y\n1,2\n3,4')),
            {frozenset([Cell(attribute='x', sequence=0, value='1')]),
             frozenset([Cell(attribute='x', sequence=0, value='3')]),
             frozenset([Cell(attribute='y', sequence=0, value='4')]),
             frozenset([Cell(attribute='x', sequence=0, value='1'), Cell(attribute='y', sequence=0, value='2')]),
             frozenset([Cell(attribute='y', sequence=0, value='2')]),
             frozenset([Cell(attribute='y', sequence=0, value='4'), Cell(attribute='x', sequence=0, value='3')])})

        self.assertEqual(
            get_all_formulas(read_sample('x\n{1 2}\n{3 4}')),
            {frozenset([Cell(attribute='x', sequence=0, value='3'), Cell(attribute='x', sequence=0, value='4')]),
             frozenset([Cell(attribute='x', sequence=0, value='4')]),
             frozenset([Cell(attribute='x', sequence=0, value='1')]),
             frozenset([Cell(attribute='x', sequence=0, value='1'), Cell(attribute='x', sequence=0, value='2')]),
             frozenset([Cell(attribute='x', sequence=0, value='3')]),
             frozenset([Cell(attribute='x', sequence=0, value='2')])})
コード例 #3
0
ファイル: optimal.py プロジェクト: domoritz/summarization
def find_optimum(relation, k):

    formulas = get_all_formulas(relation, True)

    logger.info("# formulas: %s", len(formulas))

    logger.debug("possible formulas: %s", relation_rep(formulas))

    all_subsets = list(subsets(formulas, k))

    logger.info("# subsets: %s", len(all_subsets))

    subset_costs = map(lambda x: cost(x, relation), all_subsets)

    ordered = [x for x in sorted(zip(subset_costs, all_subsets), key=lambda x: x[0])]

    best_cost = ordered[0][0]
    best = filter(lambda x: x[0] == best_cost, ordered)

    return best_cost, best
コード例 #4
0
ファイル: optimal.py プロジェクト: domoritz/summarization
def find_optimum(relation, k):
    """ Finds the optimum set of formulas and its cost.
    This method generates all formulas, all subsets of formulas and
    then calculates the cost for every one of them. This can be very slow."""

    formulas = get_all_formulas(relation, True)

    logger.info('# formulas: %s', len(formulas))

    logger.debug('possible formulas: %s', relation_rep(formulas))

    all_subsets = list(subsets(formulas, k))

    logger.info('# subsets: %s', len(all_subsets))

    subset_costs = map(lambda x: cost(x, relation), all_subsets)

    ordered = [x for x in sorted(zip(subset_costs, all_subsets), key=lambda x: x[0])]

    best_cost = ordered[0][0]
    best = filter(lambda x: x[0] == best_cost, ordered)

    return best_cost, best