Ejemplo n.º 1
0
    def test_partition(self):
        test_set = OperationsTests.test_set1
        even_part = partition.partition(test_set["input"], lambda elem: elem.left.value % 2 == 0)
        self.assertEqual(even_part, test_set["even_part"])
        if OperationsTests._print_examples:
            print("Even partition Actual:  ", even_part)
            print("Even partition Expected:", test_set["even_part"])

        thirds_part = partition.partition(test_set["input2"], lambda elem: elem.left.value % 3)
        self.assertEqual(thirds_part, test_set["thirds_part"])
        if OperationsTests._print_examples:
            print("Thirds partition Actual:  ", thirds_part)
            print("Thirds partition Expected:", test_set["thirds_part"])

        even_part_equiv = partition.make_labeled_partition(
            test_set["input"], lambda elem: elem.left.value % 2 == 0)
        self.assertEqual(even_part_equiv, test_set["even_part_equiv"])
        if OperationsTests._print_examples:
            print(even_part_equiv)

        # Negative test, returning something that can not be put inside an atom
        my_equiv_rel_fun = lambda elem: "even" if elem.left.value % 2 == 0 else Undef()
        self.assertRaises(TypeError,
            lambda: partition.partition(test_set["input2"], my_equiv_rel_fun))
        my_left_eq_rel_fn = lambda: partition.make_labeled_partition(
            test_set["input2"], my_equiv_rel_fun)
        self.assertRaises(TypeError, my_left_eq_rel_fn)
Ejemplo n.º 2
0
    def test_partition_multiset(self):
        test_set = OperationsTests.test_set2
        even_part = partition.partition(test_set["input"],
                                        lambda elem: elem.left.value % 2 == 0)
        self.assertEqual(even_part, test_set["even_part"])
        if OperationsTests._print_examples:
            print("Even partition (msets) Actual:  ", even_part)
            print("Even partition (msets) Expected:", test_set["even_part"])

        thirds_part = partition.partition(test_set["input2"],
                                          lambda elem: elem.left.value % 3)
        self.assertEqual(thirds_part, test_set["thirds_part"])
        if OperationsTests._print_examples:
            print("Thirds partition (msets) Actual:  ", thirds_part)
            print("Thirds partition (msets) Expected:",
                  test_set["thirds_part"])

        even_part_equiv = partition.make_labeled_partition(
            test_set["input"], lambda elem: elem.left.value % 2 == 0)
        self.assertEqual(even_part_equiv, test_set["even_part_equiv"])
        if OperationsTests._print_examples:
            print(even_part_equiv)

        # Negative test, returning something that can not be put inside an atom
        my_equiv_rel_fun = lambda elem: "even" if elem.left.value % 2 == 0 else Undef(
        )
        self.assertRaises(
            TypeError,
            lambda: partition.partition(test_set["input2"], my_equiv_rel_fun))
        my_left_eq_rel_fn = lambda: partition.make_labeled_partition(
            test_set["input2"], my_equiv_rel_fun)
        self.assertRaises(TypeError, my_left_eq_rel_fn)
Ejemplo n.º 3
0
def check_blocks(_board):
    """Check each block.  If there is only one value missing..."""
    if VERBOSE:
        print("* check_blocks")

    board = get_filled_cells(_board)
    blocks = partition.partition(board, partial(by_keys, 'band', 'stack'))
    for block_clan in _SORT(blocks, key=partial(by_clan_keys, 'band', 'stack')):
        new_possible, conflict = get_block_candidates(block_clan, board)

        if new_possible.is_empty:
            continue
        if new_possible.cardinality == 1:
            _board = get_new_board(_board, new_possible)
            continue

        if block_clan.cardinality == GRID_SIZE - 2:
            # Knowing that the value in conflict can't be placed in the conflict cell
            # ..it must go in the other...
            first_choice = clans.superstrict(new_possible, project(conflict, 'value'))
            if first_choice.cardinality == 2:
                # place both values
                _board = get_new_board(_board, first_choice)
                continue

            # Remove the first choice for all_possible
            remaining_possible = sets.minus(new_possible, first_choice)

            # Knowing that first_choice goes in a row/col, remove other value from that cell
            first_rowcol = project(first_choice, 'row', 'col')

            # The remaining cell is the second choice
            second_choice = sets.minus(remaining_possible,
                                       clans.superstrict(remaining_possible, first_rowcol))

            new_cells = sets.union(first_choice, second_choice)
            _board = get_new_board(_board, new_cells)
            continue

        # Partition by value
        candidates = partition.partition(new_possible, partial(by_key, 'value'))
        for candidate in _SORT(candidates, key=partial(by_clan_key, 'value')):
            # If any value fits in only 1 cell, place it
            if candidate.cardinality == 1:
                # Remove band/stack
                new_cell = project(candidate, 'row', 'col', 'value')
                _board = get_new_board(_board, new_cell)

    return _board
Ejemplo n.º 4
0
def check_blocks(_board):
    """Check each block.  If there is only one value missing..."""
    if VERBOSE:
        print("* check_blocks")

    board = get_filled_cells(_board)
    blocks = partition.partition(board, partial(by_keys, 'band', 'stack'))
    for block_clan in _SORT(blocks, key=partial(by_clan_keys, 'band', 'stack')):
        new_possible, conflict = get_block_candidates(block_clan, board)

        if new_possible.is_empty:
            continue
        if new_possible.cardinality == 1:
            _board = get_new_board(_board, new_possible)
            continue

        if block_clan.cardinality == GRID_SIZE - 2:
            # Knowing that the value in conflict can't be placed in the conflict cell
            # ..it must go in the other...
            first_choice = clans.superstrict(new_possible, project(conflict, 'value'))
            if first_choice.cardinality == 2:
                # place both values
                _board = get_new_board(_board, first_choice)
                continue

            # Remove the first choice for all_possible
            remaining_possible = sets.minus(new_possible, first_choice)

            # Knowing that first_choice goes in a row/col, remove other value from that cell
            first_rowcol = project(first_choice, 'row', 'col')

            # The remaining cell is the second choice
            second_choice = sets.minus(remaining_possible,
                                       clans.superstrict(remaining_possible, first_rowcol))

            new_cells = sets.union(first_choice, second_choice)
            _board = get_new_board(_board, new_cells)
            continue

        # Partition by value
        candidates = partition.partition(new_possible, partial(by_key, 'value'))
        for candidate in _SORT(candidates, key=partial(by_clan_key, 'value')):
            # If any value fits in only 1 cell, place it
            if candidate.cardinality == 1:
                # Remove band/stack
                new_cell = project(candidate, 'row', 'col', 'value')
                _board = get_new_board(_board, new_cell)

    return _board
Ejemplo n.º 5
0
def check_values(_board):
    """Look for values where only one is missing.  If there is only one missing, then there is
    only one cell where adding the value would not cause a duplicate in a row or column.  Fill
    in those cells if they exist."""
    if verbose:
        print("* check_values")
    board = get_filled_cells(_board)

    new_cells = Set()
    value_clans = partition.partition(board, partial(by_key, 'value'))
    for value_clan in _sort(value_clans, key=partial(by_clan_key, 'value')):
        # If there is only 1 missing value..fill in the cell
        if value_clan.cardinality == GRID_SIZE-1:
            # Get the set of rows and cols containing value
            occupied_rows = project(value_clan, 'row')
            occupied_cols = project(value_clan, 'col')
            # Get the entire set of rows and cols based on the occupied rows and cols
            occupied = clans.superstrict(_board, sets.union(occupied_rows, occupied_cols))
            # Remove all occupied rows to get the only candidate row_col left
            row_col = sets.minus(_board, occupied)
            value = project(value_clan, 'value')
            new_cells = sets.union(new_cells, clans.cross_union(row_col, value))
    if new_cells:
        return get_new_board(_board, new_cells)
    return _board
Ejemplo n.º 6
0
def check_values(_board):
    """Look for values where only one is missing.  If there is only one missing, then there is
    only one cell where adding the value would not cause a duplicate in a row or column.  Fill
    in those cells if they exist."""
    if VERBOSE:
        print("* check_values")
    board = get_filled_cells(_board)

    new_cells = Set()
    value_clans = partition.partition(board, partial(by_key, 'value'))
    for value_clan in _SORT(value_clans, key=partial(by_clan_key, 'value')):
        # If there is only 1 missing value..fill in the cell
        if value_clan.cardinality == GRID_SIZE - 1:
            # Get the set of rows and cols containing value
            occupied_rows = project(value_clan, 'row')
            occupied_cols = project(value_clan, 'col')
            # Get the entire set of rows and cols based on the occupied rows and cols
            occupied = clans.superstrict(_board, sets.union(occupied_rows, occupied_cols))
            # Remove all occupied rows to get the only candidate row_col left
            row_col = sets.minus(_board, occupied)
            value = project(value_clan, 'value')
            new_cells = sets.union(new_cells, clans.cross_union(row_col, value))
    if new_cells:
        return get_new_board(_board, new_cells)
    return _board
Ejemplo n.º 7
0
def check_blocks(_board):
    """Check each block.  If there is only one value missing..."""
    if verbose:
        print("* check_blocks")

    board = get_filled_cells(_board)
    blocks = partition.partition(board, partial(by_keys, 'band', 'stack'))
    for block_clan in _sort(blocks, key=partial(by_clan_keys, 'band', 'stack')):
        # Get the set of missing values...see if any can be placed due to row/col information
        values_clan = get_missing_values(block_clan)

        # Get the set of missing values...see if any can be placed due to row/col information
        target_rowcols = get_missing_rowcols(block_clan)

        if block_clan.cardinality == GRID_SIZE-1:
            new_cells = clans.cross_union(target_rowcols, values_clan)
            _board = get_new_board(_board, new_cells)
            continue

        # Need cross union values with rows
        rows_clan = project(target_rowcols, 'row')
        cols_clan = project(target_rowcols, 'col')
        possible_rows_values = clans.cross_union(values_clan, rows_clan)
        possible_cols_values = clans.cross_union(values_clan, cols_clan)

        possible_rows_cols_values = sets.union(possible_rows_values, possible_cols_values)

        # The occupied_clan is the row/col/value set that is a conflict for values
        occupied_clan = project(clans.superstrict(board, possible_rows_cols_values),
                                'value', 'row', 'col')

        # If there are no conflicts then no cells can be placed
        if occupied_clan.is_empty:
            continue

        all_possible = clans.cross_union(values_clan, target_rowcols).cache_is_left_functional(True)
        for rel in all_possible:
            rel.cache_is_left_functional(True)

        # Get the set of conflicts...conflicting row/value + col/value
        conflict = sets.union(
            clans.superstrict(all_possible, project(occupied_clan, 'value', 'col')),
            clans.superstrict(all_possible, project(occupied_clan, 'value', 'row')))

        # Remove the conflicts from all_possible
        new_possible = sets.minus(all_possible, conflict)

        if block_clan.cardinality == GRID_SIZE-2:
            # Knowing that the value in conflict can't be placed in the conflict cell
            # ..it must go in the other...
            first_choice = clans.superstrict(new_possible, project(conflict, 'value'))
            if first_choice.cardinality == 2:
                # place both values
                _board = get_new_board(_board, first_choice)
                continue

            # Remove the first choice for all_possible
            remaining_possible = sets.minus(new_possible, first_choice)

            # Knowing that first_choice goes in a row/col, remove other value from that cell
            first_rowcol = project(first_choice, 'row', 'col')

            # The remaining cell is the second choice
            second_choice = sets.minus(remaining_possible,
                                       clans.superstrict(remaining_possible, first_rowcol))

            new_cells = sets.union(first_choice, second_choice)
            _board = get_new_board(_board, new_cells)
            continue

        # Partition by value
        candidates = partition.partition(new_possible, partial(by_key, 'value'))
        for candidate in _sort(candidates, key=partial(by_clan_key, 'value')):
            # If any value fits in only 1 cell, place it
            if candidate.cardinality == 1:
                # Remove band/stack
                new_cell = project(candidate, 'row', 'col', 'value')
                _board = get_new_board(_board, new_cell)
    return _board
Ejemplo n.º 8
0
def check_rows(_board):
    """Look for rows where there is only one missing value.  If any are found fill in the missing
    value.  Look for rows where there are two missing values.  If either missing value is blocked
    by the same value in the candidate row, col, or block then the other value can be placed in
    the blocked cell.  The other value can be placed in the other cell.  Look for rows with more
    than two missing values.  Check each empty cell to see only one of the missing values can be
    placed in it.  Check each value to see if there is only one cell where it can be placed."""
    if verbose:
        print("* check_rows")
    board = get_filled_cells(_board)

    all_rows_clans = partition.partition(board, partial(by_key, 'row'))
    for row_clan in _sort(all_rows_clans, key=partial(by_clan_key, 'row')):
        row = project(row_clan, 'row')
        board_row = clans.superstrict(_board, row)
        values_clan = get_missing_values(row_clan)

        if row_clan.cardinality == GRID_SIZE-1:
            # Row is missing only 1 value, remove row_clan from the board leaving target row_col
            row_col = sets.minus(board_row, row_clan)
            new_cells = clans.cross_union(row_col, values_clan)
            _board = get_new_board(_board, new_cells)
            continue

        # Get the set of candidate col/value pairs
        row_possible = clans.cross_union(values_clan,
                                         project(sets.minus(board_row, row_clan), 'col'))

        if row_clan.cardinality == GRID_SIZE-2:

            # The occupied_clan is the col/value pair that is a conflict for each col/value
            occupied_clan = project(clans.superstrict(board, row_possible), 'col', 'value')

            # If there are no conflicts neither value can be placed without checking entire board
            if not occupied_clan.is_empty:
                # ..remove occupied_clan col/value pairs from all possible
                new_possible = sets.minus(row_possible, occupied_clan)

                if new_possible.cardinality == 2:
                    # Of the 4 possibilities (2 values * 2 cols), 2 were removed, place remaining
                    new_cells = clans.cross_union(row, new_possible)
                    _board = get_new_board(_board, new_cells)
                    continue

                # 3 of the possibilities remain...
                occupied_col = project(occupied_clan, 'col')

                # Remove the occupied_col choices to get the first col/value pair
                col_value1 = clans.superstrict(new_possible, occupied_col)

                occupied_val = project(col_value1, 'value')

                # Remove the occupied_val choices to get the second col/value pair
                col_value2 = sets.minus(new_possible, clans.superstrict(new_possible, occupied_val))

                new_cells = clans.cross_union(row, col_value1)
                new_cells = sets.union(new_cells, clans.cross_union(row, col_value2))
                _board = get_new_board(_board, new_cells)
                continue

        # The occupied_clan is the row/col/value set that could be a conflict for values
        occupied_clan = clans.superstrict(board, values_clan)

        # If there are no conflicts then no cells can be placed
        if occupied_clan.is_empty:
            continue

        # Add row to row_possible for remaining checks
        all_possible = clans.cross_union(row_possible, row)

        # Get the set of conflicts...conflicting row/value + col/value
        conflict = sets.union(
            clans.superstrict(all_possible,
                              project(occupied_clan, 'value', 'col')),
            clans.superstrict(all_possible,
                              project(occupied_clan, 'value', 'row')))

        # Remove the conflicts from all_possible
        new_possible = sets.minus(all_possible, conflict)

        if new_possible.is_empty:
            continue  # All possible may have been excluded due to row/col conflicts

        # Otherwise...need to check for block (band+stack) conflicts too!!
        # ...if value exists in same block as element of all_possible

        # Add band/stack
        new_targets = clans.superstrict(BANDS_STACKS, project(new_possible, 'row', 'col'))
        new_possible3 = clans.functional_cross_union(new_targets, new_possible)
        occupied_clan2 = occupied_clan

        # Remove block (band+stack) conflicts
        new_possible4a = sets.minus(project(new_possible3, 'value', 'band', 'stack'),
                                    project(occupied_clan2, 'value', 'band', 'stack'))
        new_possible4 = clans.superstrict(new_possible3, new_possible4a)

        # Partition by row/col
        placed = 0
        candidates = partition.partition(new_possible4, partial(by_keys, 'row', 'col'))
        for candidate in _sort(candidates, key=partial(by_clan_key, 'col')):
            # If any row/col has only 1 candidate, place it
            if candidate.cardinality == 1:
                # Remove band/stack
                cell = project(candidate, 'row', 'col', 'value')
                _board = get_new_board(_board, cell)
                placed += 1

        if placed:
            continue

        # Partition by value
        candidates = partition.partition(new_possible4, partial(by_key, 'value'))
        for candidate in _sort(candidates, key=partial(by_clan_key, 'value')):
            # If any value fits in only 1 cell, place it
            if candidate.cardinality == 1:
                # Remove band/stack
                cell = project(candidate, 'row', 'col', 'value')
                _board = get_new_board(_board, cell)
    return _board
Ejemplo n.º 9
0
def query5():
    # select
    #     nationname,
    #     sum(lineitem.extendedprice * (1 - lineitem.discount)) as revenue
    # from
    #     customer, orders, lineitem,   -- Loaded from CSV
    #     nation, region                -- Loaded from XML
    # where
    #     customer.custkey = orders.custkey
    #     and lineitem.orderkey = orders.orderkey
    #     and customer.nationkey = nation.nationkey
    #     and supplier.nationkey = nation.nationkey
    #     and nation.regionkey = region.regionkey
    #     and region.name = 'AMERICA'
    #     and orders.orderdate >= date '1996-01-01'
    #     and orders.orderdate < date '1996-01-01' + interval '1' year
    # group by
    #     n_name
    timer = FunctionTimer()
    short_prints = True

    # Join supplier_solutions and customers_nations_projected on 'nationkey'.
    result1 = clans.cross_functional_union(
        get_supplier_solutions(),
        get_customers_nations_projected(get_nations(region_name)))
    timer.lap('result1', short=short_prints)

    # Join result1 with orders_restricted_projected on 'custkey'.
    result2 = clans.cross_functional_union(
        result1, get_orders_restricted_projected(start_date, end_date))
    timer.lap('result2', short=short_prints)

    # Join result with lineitem on 'orderkey' and 'suppkey'.
    lineitem_types = {
        'orderkey': int,
        'suppkey': int,
        'extendedprice': float,
        'discount': float,
        'partkey': int,
        'linenumber': int,
        'quantity': int,
        'tax': float,
    }
    result3 = clans.cross_functional_union(
        result2, csv.import_csv('lineitem.csv', lineitem_types))
    timer.lap('result3', short=short_prints)

    # Add the 'revenue' column.
    def calc_revenue(rel):
        return Couplet(
            'revenue',
            rel('extendedprice').value * (1 - rel('discount').value))

    result4 = Set(
        relations.functional_add(rel, calc_revenue(rel)) for rel in result3)
    timer.lap('result4', short=short_prints)
    # Remove unnecessary columns.
    revenue_by_nations = clans.project(result4, 'revenue', 'nationname')

    # Partition the result on 'nationname'.
    revenue_grouped_by_nations = partition.partition(
        revenue_by_nations, lambda rel: rel('nationname'))
    timer.lap('revenue_grouped_by_nations', short=short_prints)

    # Almost generic aggregation function. (Handles 'normal' cases, but not all edge cases.)
    def aggregate(horde, group_left, aggregation_left, aggregate_func):
        aggregation = {}
        for clan in horde:
            aggregation_value = aggregate_func.identity
            for relation in clan:
                aggregation_value = aggregate_func(
                    aggregation_value,
                    relation(aggregation_left).value)
            first_relation = next(iter(clan))
            aggregation[first_relation(group_left)] = aggregation_value
        return Set([
            Set(Couplet(group_left, key),
                Couplet(aggregation_left, aggregation[key]))
            for key in aggregation
        ])

    # Our aggregation function (adding two numbers, identity is 0).
    def aggregate_sum(arg1, arg2):
        return arg1 + arg2

    aggregate_sum.identity = 0

    # Calculate the aggregation result.
    # noinspection PyTypeChecker
    query5_result = aggregate(revenue_grouped_by_nations, 'nationname',
                              'revenue', aggregate_sum)
    timer.end('query5_result')

    return query5_result
Ejemplo n.º 10
0
def check_rows(_board, try_harder=0):
    """Look for rows where there is only one missing value.  If any are found fill in the missing
    value.  Look for rows where there are two missing values.  If either missing value is blocked
    by the same value in the candidate row, col, or block then the other value can be placed in
    the blocked cell.  The other value can be placed in the other cell.  Look for rows with more
    than two missing values.  Check each empty cell to see only one of the missing values can be
    placed in it.  Check each value to see if there is only one cell where it can be placed."""
    if VERBOSE:
        print("* check_rows")
    board = get_filled_cells(_board)

    all_rows_clans = partition.partition(board, partial(by_key, 'row'))
    for row_clan in _SORT(all_rows_clans, key=partial(by_clan_key, 'row')):
        row = project(row_clan, 'row')
        board_row = clans.superstrict(_board, row)
        values_clan = get_missing_values(row_clan)

        if row_clan.cardinality == GRID_SIZE - 1:
            # Row is missing only 1 value, remove row_clan from the board leaving target row_col
            row_col = sets.minus(board_row, row_clan)
            new_cells = clans.cross_union(row_col, values_clan)
            _board = get_new_board(_board, new_cells)
            try_harder = 0
            continue

        # Get the set of candidate col/value pairs
        row_possible = clans.cross_union(values_clan,
                                         project(sets.minus(board_row, row_clan), 'col'))

        if row_clan.cardinality == GRID_SIZE - 2:

            # The occupied_clan is the col/value pair that is a conflict for each col/value
            occupied_clan = project(clans.superstrict(board, row_possible), 'col', 'value')

            # If there are no conflicts neither value can be placed without checking entire board
            if not occupied_clan.is_empty:
                # ..remove occupied_clan col/value pairs from all possible
                new_possible = sets.minus(row_possible, occupied_clan)

                if new_possible.cardinality == 2:
                    # Of the 4 possibilities (2 values * 2 cols), 2 were removed, place remaining
                    new_cells = clans.cross_union(row, new_possible)
                    _board = get_new_board(_board, new_cells)
                    try_harder = 0
                    continue

                # 3 of the possibilities remain...
                occupied_col = project(occupied_clan, 'col')

                # Remove the occupied_col choices to get the first col/value pair
                col_value1 = clans.superstrict(new_possible, occupied_col)

                occupied_val = project(col_value1, 'value')

                # Remove the occupied_val choices to get the second col/value pair
                col_value2 = sets.minus(new_possible, clans.superstrict(new_possible, occupied_val))

                new_cells = clans.cross_union(row, col_value1)
                new_cells = sets.union(new_cells, clans.cross_union(row, col_value2))
                _board = get_new_board(_board, new_cells)
                try_harder = 0
                continue

        # The occupied_clan is the row/col/value set that could be a conflict for values
        occupied_clan = clans.superstrict(board, values_clan)

        # If there are no conflicts then no cells can be placed
        if occupied_clan.is_empty:
            continue

        # Add row to row_possible for remaining checks
        all_possible = clans.cross_union(row_possible, row)

        # Get the set of conflicts...conflicting row/value + col/value
        conflict = sets.union(
            clans.superstrict(all_possible,
                              project(occupied_clan, 'value', 'col')),
            clans.superstrict(all_possible,
                              project(occupied_clan, 'value', 'row')))

        # Remove the conflicts from all_possible
        new_possible = sets.minus(all_possible, conflict)

        if new_possible.is_empty:
            continue  # All possible may have been excluded due to row/col conflicts

        # Otherwise...need to check for block (band+stack) conflicts too!!
        # ...if value exists in same block as element of all_possible

        # Add band/stack
        new_targets = clans.superstrict(BANDS_STACKS, project(new_possible, 'row', 'col'))
        new_possible3 = clans.cross_functional_union(new_targets, new_possible)
        occupied_clan2 = occupied_clan

        # Remove block (band+stack) conflicts
        new_possible4a = sets.minus(project(new_possible3, 'value', 'band', 'stack'),
                                    project(occupied_clan2, 'value', 'band', 'stack'))
        new_possible4 = clans.superstrict(new_possible3, new_possible4a)

        while True:
            candidates_updated = False
            # Partition by row/col
            placed = 0
            candidates = partition.partition(new_possible4, partial(by_keys, 'row', 'col'))
            for candidate in _SORT(candidates, key=partial(by_clan_key, 'col')):
                # If any row/col has only 1 candidate, place it
                if candidate.cardinality == 1:
                    # Remove band/stack
                    _board = get_new_board(_board, candidate)
                    try_harder = 0
                    placed += 1
            if placed:
                break

            # Partition by value
            candidates = partition.partition(new_possible4, partial(by_key, 'value'))
            for candidate in _SORT(candidates, key=partial(by_clan_key, 'value')):
                # If any value fits in only 1 cell, place it
                if candidate.cardinality == 1:
                    # Remove band/stack
                    _board = get_new_board(_board, candidate)
                    try_harder = 0
                else:  # If any value must be placed elsewhere, remove as candidate for this cell
                    if try_harder:
                        value = project(candidate, 'value')
                        # If this row of a sibling block must contain this value...
                        blocks = partition.partition(candidate, partial(by_keys, 'band', 'stack'))
                        if blocks.cardinality > 1:
                            for block_clan in _SORT(blocks,
                                                    key=partial(by_clan_keys, 'band', 'stack')):
                                block = project(block_clan, 'band', 'stack')
                                board_block = clans.superstrict(board, block)
                                if board_block.is_empty:
                                    continue

                                new_possible, conflict = get_block_candidates(board_block, board)
                                new_possible_value = clans.superstrict(new_possible, value)

                                if new_possible_value['row'].cardinality == 1:
                                    # Value must be placed in this block
                                    # ...other block candidates can be removed
                                    remove = sets.minus(candidate, block_clan)
                                    new_possible4 = sets.minus(new_possible4, remove)
                                    candidates_updated = True
            if not candidates_updated or not try_harder:
                break
    return _board
Ejemplo n.º 11
0
def query5():
    # select
    #     nationname,
    #     sum(lineitem.extendedprice * (1 - lineitem.discount)) as revenue
    # from
    #     customer, orders, lineitem,   -- Loaded from CSV
    #     nation, region                -- Loaded from XML
    # where
    #     customer.custkey = orders.custkey
    #     and lineitem.orderkey = orders.orderkey
    #     and customer.nationkey = nation.nationkey
    #     and supplier.nationkey = nation.nationkey
    #     and nation.regionkey = region.regionkey
    #     and region.name = 'AMERICA'
    #     and orders.orderdate >= date '1996-01-01'
    #     and orders.orderdate < date '1996-01-01' + interval '1' year
    # group by
    #     n_name
    timer = FunctionTimer()
    short_prints = True

    # Join supplier_solutions and customers_nations_projected on 'nationkey'.
    result1 = clans.functional_cross_union(
        get_supplier_solutions(), get_customers_nations_projected(get_nations(region_name))
    )
    timer.lap("result1", short=short_prints)

    # Join result1 with orders_restricted_projected on 'custkey'.
    result2 = clans.functional_cross_union(result1, get_orders_restricted_projected(start_date, end_date))
    timer.lap("result2", short=short_prints)

    # Join result with lineitem on 'orderkey' and 'suppkey'.
    lineitem_types = {
        "orderkey": int,
        "suppkey": int,
        "extendedprice": float,
        "discount": float,
        "partkey": int,
        "linenumber": int,
        "quantity": int,
        "tax": float,
    }
    result3 = clans.functional_cross_union(result2, csv.import_csv("lineitem.csv", lineitem_types))
    timer.lap("result3", short=short_prints)

    # Add the 'revenue' column.
    def calc_revenue(rel):
        return Couplet("revenue", rel("extendedprice").value * (1 - rel("discount").value))

    result4 = Set(relations.functional_add(rel, calc_revenue(rel)) for rel in result3)
    timer.lap("result4", short=short_prints)
    # Remove unnecessary columns.
    revenue_by_nations = clans.project(result4, "revenue", "nationname")

    # Partition the result on 'nationname'.
    revenue_grouped_by_nations = partition.partition(revenue_by_nations, lambda rel: rel("nationname"))
    timer.lap("revenue_grouped_by_nations", short=short_prints)

    # Almost generic aggregation function. (Handles 'normal' cases, but not all edge cases.)
    def aggregate(horde, group_left, aggregation_left, aggregate_func):
        aggregation = {}
        for clan in horde:
            aggregation_value = aggregate_func.identity
            for relation in clan:
                aggregation_value = aggregate_func(aggregation_value, relation(aggregation_left).value)
            first_relation = next(iter(clan))
            aggregation[first_relation(group_left)] = aggregation_value
        return Set([Set(Couplet(group_left, key), Couplet(aggregation_left, aggregation[key])) for key in aggregation])

    # Our aggregation function (adding two numbers, identity is 0).
    def aggregate_sum(arg1, arg2):
        return arg1 + arg2

    aggregate_sum.identity = 0

    # Calculate the aggregation result.
    # noinspection PyTypeChecker
    query5_result = aggregate(revenue_grouped_by_nations, "nationname", "revenue", aggregate_sum)
    timer.end("query5_result")

    return query5_result