Exemple #1
0
def multiply(input_op_node: saldag.OpNode, output_name: str, target_col_name: str, operands: list):
    """
    Define Multiply relation.

    :param input_op_node: Parent node for the node returned by this method.
    :param output_name: Name of returned Multiply node.
    :param target_col_name: Name of column that stores results of Multiply operation.
    If target_col_name refers to an already existing column in the relation, then that
    column should also be the first argument in the operands list. If target_col_name
    does not refer to an existing column, then the columns in the operands list will
    be multiplied together in order, and stored in a column named <target_col_name> and
    appended to the relation.
    :param operands: List of operand columns & scalars.
    :return: Multiply OpNode.
    """

    # Get input relation from input node
    in_rel = input_op_node.out_rel

    # Get relevant columns and create copies
    out_rel_cols = copy.deepcopy(in_rel.columns)

    # Replace all column names with corresponding columns.
    operands = [utils.find(in_rel.columns, op) if isinstance(
        op, str) else op for op in operands]
    for operand in operands:
        if hasattr(operand, "coll_sets"):
            operand.coll_sets = set()

    # if target_col already exists, it will be at the 0th index of operands
    if target_col_name == operands[0].name:
        target_column = utils.find(in_rel.columns, target_col_name)
        target_column.coll_sets = set()
    else:
        # TODO: figure out new column's coll_sets
        target_column = rel.Column(
            output_name, target_col_name, len(in_rel.columns), "INTEGER", set())
        out_rel_cols.append(target_column)

    # Create output relation
    out_rel = rel.Relation(output_name, out_rel_cols, copy.copy(in_rel.stored_with))
    out_rel.update_columns()

    # Create our operator node
    op = saldag.Multiply(out_rel, input_op_node, target_column, operands)

    # Add it as a child to input node
    input_op_node.children.add(op)

    return op
Exemple #2
0
def aggregate(input_op_node: cc_dag.OpNode, output_name: str,
              group_col_names: list, over_col_name: str, aggregator: str,
              agg_out_col_name: str):
    """
    Define Aggregate operation.

    :param input_op_node: Parent node for the node returned by this method.
    :param output_name: Name of returned Aggregate node.
    :param group_col_names: List of column names to be used as key columns in the aggregation.
    :param over_col_name: Name of column that gets aggregated.
    :param aggregator: Aggregate function ('+', 'max', 'min', etc.)
    :param agg_out_col_name: Name of (optionally renamed) aggregate column for returned node.
    :return: Aggregate OpNode.
    """

    assert isinstance(group_col_names, list)

    # Get input relation from input node
    in_rel = input_op_node.out_rel

    # Get relevant columns and reset their collusion sets
    in_cols = in_rel.columns
    group_cols = [
        utils.find(in_cols, group_col_name)
        for group_col_name in group_col_names
    ]
    over_col = utils.find(in_cols, over_col_name)

    # Create output relation. Default column order is
    # key column first followed by column that will be
    # aggregated. Note that we want copies as these are
    # copies on the output relation and changes to them
    # shouldn't affect the original columns
    agg_out_col = copy.deepcopy(over_col)
    agg_out_col.name = agg_out_col_name
    out_rel_cols = [copy.deepcopy(group_col) for group_col in group_cols]
    out_rel_cols.append(copy.deepcopy(agg_out_col))
    out_rel = rel.Relation(output_name, out_rel_cols,
                           copy.copy(in_rel.stored_with))
    out_rel.update_columns()

    # Create our operator node
    op = cc_dag.Aggregate(out_rel, input_op_node, group_cols, over_col,
                          aggregator)

    # Add it as a child to input node
    input_op_node.children.add(op)

    return op
Exemple #3
0
def cc_filter(input_op_node: cc_dag.OpNode,
              output_name: str,
              filter_col_name: str,
              operator: str,
              other_col_name: str = None,
              scalar: int = None):
    """
    Define Filter operation.

    :param input_op_node: Parent node for the node returned by this method.
    :param output_name: Name of returned Filter node.
    :param filter_col_name: Name of column that relation gets filtered over.
    :param operator: == or <
    :param other_col_name: Name of column to compare to (possibly none).
    :param scalar: Scalar to compare to(possibly none).

    :return: Filter OpNode
    """

    # Make sure we're using valid operator option
    assert operator in {"==", "<"}

    # Get input relation from input node
    in_rel = input_op_node.out_rel

    # Get relevant columns and create copies
    out_rel_cols = copy.deepcopy(in_rel.columns)

    # Get index of filter column
    filter_col = utils.find(in_rel.columns, filter_col_name)

    # Get index of other column (if there is one)
    other_col = utils.find(in_rel.columns,
                           other_col_name) if other_col_name else None

    # Create output relation
    out_rel = rel.Relation(output_name, out_rel_cols,
                           copy.copy(in_rel.stored_with))
    out_rel.update_columns()

    # Create our operator node
    op = cc_dag.Filter(out_rel, input_op_node, filter_col, operator, other_col,
                       scalar)

    # Add it as a child to input node
    input_op_node.children.add(op)

    return op
Exemple #4
0
def _comp_neighs(input_op_node: saldag.OpNode, output_name: str, comp_col_name: str):
    """
    Define CompNeighs relation.

    :param input_op_node: Parent node for the node returned by this method.
    :param output_name: Name of returned CompNeighs node.
    :param comp_col_name: Name of column that keys comparison operation.
    :return: CompNeighs OpNode.
    """

    # Get input relation from input node
    in_rel = input_op_node.out_rel

    # Get relevant columns and create copies
    out_rel_cols = copy.deepcopy(in_rel.columns)

    comp_col = utils.find(in_rel.columns, comp_col_name)
    comp_col.stored_with = set()

    for col in out_rel_cols:
        col.coll_sets = set()

    # Create output relation
    out_rel = rel.Relation(output_name, [copy.deepcopy(comp_col)], copy.copy(in_rel.stored_with))
    out_rel.update_columns()

    # Create our operator node
    op = saldag.CompNeighs(out_rel, input_op_node, comp_col)

    # Add it as a child to input node
    input_op_node.children.add(op)

    return op
Exemple #5
0
def filter(input_op_node: saldag.OpNode, output_name: str, filter_col_name: str, operator: str, filter_expr: str):
    # TODO: Not implemented in codegen as far as I can tell
    """
    Define Filter relation.

    :param input_op_node: Parent node for the node returned by this method.
    :param output_name: Name of returned Filter node.
    :param filter_col_name: Name of column that relation gets filtered over.
    :param operator: # TODO not sure what the difference between operator and filter_expr is
    :param filter_expr:
    :return: Filter OpNode
    """

    # Get input relation from input node
    in_rel = input_op_node.out_rel

    # Get relevant columns and create copies
    out_rel_cols = copy.deepcopy(in_rel.columns)

    # Get index of filter column
    filter_col = utils.find(in_rel.columns, filter_col_name)
    filter_col.coll_sets = set()

    # Create output relation
    out_rel = rel.Relation(output_name, out_rel_cols, copy.copy(in_rel.stored_with))
    out_rel.update_columns()

    # Create our operator node
    op = saldag.Filter(out_rel, input_op_node, filter_col, operator, filter_expr)

    # Add it as a child to input node
    input_op_node.children.add(op)

    return op
Exemple #6
0
def distinct(input_op_node: saldag.OpNode, output_name: str, selected_col_names: list):
    """
    Define Distinct relation.

    :param input_op_node: Parent node for the node returned by this method.
    :param output_name: Name of returned Distinct node.
    :param selected_col_names: List of column names the the Distinct operation will key over.
    :return: Distinct OpNode.
    """

    # Get input relation from input node
    in_rel = input_op_node.out_rel

    # Find all columns by name
    selected_cols = [utils.find(in_rel.columns, col_name) for col_name in selected_col_names]

    out_rel_cols = copy.deepcopy(selected_cols)
    for col in out_rel_cols:
        col.coll_sets = set()

    # Create output relation
    out_rel = rel.Relation(output_name, out_rel_cols, copy.copy(in_rel.stored_with))
    out_rel.update_columns()

    # Create our operator node
    op = saldag.Distinct(out_rel, input_op_node, selected_cols)

    # Add it as a child to input node
    input_op_node.children.add(op)

    return op
Exemple #7
0
def sort_by(input_op_node: saldag.OpNode, output_name: str, sort_by_col_name: str):
    """
    Define Sort By relation.

    :param input_op_node: Parent node for the node returned by this method.
    :param output_name: Name of returned SortBy node.
    :param sort_by_col_name: Name of column that keys sorting.
    :return: SortBy OpNode.
    """

    # Get input relation from input node
    in_rel = input_op_node.out_rel

    # Get relevant columns and create copies
    out_rel_cols = copy.deepcopy(in_rel.columns)

    sort_by_col = utils.find(in_rel.columns, sort_by_col_name)

    for col in out_rel_cols:
        col.coll_sets = set()

    # Create output relation
    out_rel = rel.Relation(output_name, out_rel_cols, copy.copy(in_rel.stored_with))
    out_rel.update_columns()

    # Create our operator node
    op = saldag.SortBy(out_rel, input_op_node, sort_by_col)

    # Add it as a child to input node
    input_op_node.children.add(op)

    return op
Exemple #8
0
def _pub_join(input_op_node: cc_dag.OpNode,
              output_name: str,
              key_col_name: str,
              host: str = "ca-spark-node-0",
              port: int = 8042,
              is_server: bool = True,
              other_op_node: cc_dag.OpNode = None):
    # Get input relation from input node
    in_rel = input_op_node.out_rel

    # Get relevant columns and create copies
    out_rel_cols = copy.deepcopy(in_rel.columns)
    if other_op_node:
        out_rel_cols += copy.deepcopy(other_op_node.out_rel.columns[1:])

    # Get index of filter column
    key_col = utils.find(in_rel.columns, key_col_name)
    assert key_col.idx == 0
    # key_col.trust_set = set()

    # Create output relation
    out_rel = rel.Relation(output_name, out_rel_cols,
                           copy.copy(in_rel.stored_with))
    out_rel.update_columns()

    # Create our operator node
    op = cc_dag.PubJoin(out_rel, input_op_node, key_col, host, port, is_server,
                        other_op_node)

    # Add it as a child to input node
    input_op_node.children.add(op)
    if other_op_node:
        other_op_node.children.add(op)

    return op
Exemple #9
0
def distinct_count(input_op_node: cc_dag.OpNode, output_name: str,
                   selected_col_name: str):
    """
    Define DistinctCount operation.

    :param input_op_node: Parent node for the node returned by this method.
    :param output_name: Name of returned Distinct node.
    :param selected_col_name: Column name the Distinct operation will key over.
    :param use_sort: flag indicating if sort is necessary or not
    :return: Distinct OpNode.
    """

    # Get input relation from input node
    in_rel = input_op_node.out_rel

    # Find all columns by name
    selected_col = utils.find(in_rel.columns, selected_col_name)

    out_rel_cols = copy.deepcopy([selected_col])
    for col in out_rel_cols:
        col.trust_set = set()

    # Create output relation
    out_rel = rel.Relation(output_name, out_rel_cols,
                           copy.copy(in_rel.stored_with))
    out_rel.update_columns()

    # Create our operator node
    op = cc_dag.DistinctCount(out_rel, input_op_node, selected_col)

    # Add it as a child to input node
    input_op_node.children.add(op)

    return op
Exemple #10
0
def union(left_input_node: cc_dag.OpNode, right_input_node: cc_dag.OpNode,
          output_name: str, left_col_name: str, right_col_name: str):
    """
    Computes a single column relation containing the union of values of the two selected columns.
    """

    # Create output column and relation
    out_col = Column(output_name, left_col_name, 0, "INTEGER", set())
    left_stored_with = left_input_node.out_rel.stored_with
    right_stored_with = right_input_node.out_rel.stored_with
    out_rel = rel.Relation(output_name, [out_col],
                           left_stored_with.union(right_stored_with))
    out_rel.update_columns()

    left_col = utils.find(left_input_node.out_rel.columns, left_col_name)
    right_col = utils.find(right_input_node.out_rel.columns, right_col_name)
    op = cc_dag.Union(out_rel, left_input_node, right_input_node, left_col,
                      right_col)

    # Add it as a child to input nodes
    left_input_node.children.add(op)
    right_input_node.children.add(op)

    return op
Exemple #11
0
def filter_by(input_op_node: cc_dag.OpNode,
              output_name: str,
              filter_col_name: str,
              by_op: cc_dag.OpNode,
              use_not_in: bool = False):
    """
    Define FilterBy operation.

    :param input_op_node: Parent node for the node returned by this method.
    :param output_name: Name of returned Filter node.
    :param filter_col_name: Name of column that relation gets filtered over.
    :param by_op: Parent node to filter by.
    :param use_not_in: flag indicating whether to use not in instead of in
    :return: FilterBy OpNode
    """

    # Get input relation from input node
    in_rel = input_op_node.out_rel

    # Get relevant columns and create copies
    out_rel_cols = copy.deepcopy(in_rel.columns)

    # Get index of filter column
    filter_col = utils.find(in_rel.columns, filter_col_name)

    # Create output relation
    out_rel = rel.Relation(output_name, out_rel_cols,
                           copy.copy(in_rel.stored_with))
    out_rel.update_columns()

    # Create our operator node
    op = cc_dag.FilterBy(out_rel, input_op_node, by_op, filter_col, use_not_in)

    # Add it as a child to input nodes
    input_op_node.children.add(op)
    by_op.children.add(op)

    return op
Exemple #12
0
def _pub_intersect(input_node: cc_dag.OpNode,
                   output_name: str,
                   col_name: str,
                   host: str = "ca-spark-node-0",
                   port: int = 8042,
                   is_server: bool = True):
    """
    Computes a single column relation containing the intersection of values of the two selected columns.
    """

    # Create output column and relation
    out_col = Column(output_name, col_name, 0, "INTEGER", set())
    left_stored_with = input_node.out_rel.stored_with
    out_rel = rel.Relation(output_name, [out_col], copy.copy(left_stored_with))
    out_rel.update_columns()

    left_col = utils.find(input_node.out_rel.columns, col_name)
    op = cc_dag.PubIntersect(out_rel, input_node, left_col, host, port,
                             is_server)

    # Add it as a child to input node
    input_node.children.add(op)

    return op
Exemple #13
0
def join(left_input_node: saldag.OpNode, right_input_node: saldag.OpNode, output_name: str,
         left_col_names: list, right_col_names: list):
    """
    Define Join relation.

    :param left_input_node: Left parent node for the node returned by this method.
    :param right_input_node: Right parent node for the node returned by this method.
    :param output_name: Name of returned Join node.
    :param left_col_names: List of join columns in left parent relation.
    :param right_col_names: List of join columns in right parent relation.
    :return: Join OpNode.
    """

    # TODO: technically this should take in a start index as well
    # This helper method takes in a relation, the key column of the join
    # and its index.
    # It returns a list of new columns with correctly merged collusion sets
    # for the output relation (in the same order as they appear on the input
    # relation but excluding the key column)
    def _cols_from_rel(start_idx: int, relation: rel.Relation, key_col_idxs: list):

        result_cols = []
        for num, col in enumerate(relation.columns):
            # Exclude key columns and add num from enumerate to start index
            if col.idx not in set(key_col_idxs):
                new_col = rel.Column(
                    output_name, col.get_name(), num + start_idx - len(key_col_idxs), col.type_str, set())
                result_cols.append(new_col)

        return result_cols

    assert isinstance(left_col_names, list)
    assert isinstance(right_col_names, list)

    # Get input relation from input nodes
    left_in_rel = left_input_node.out_rel
    right_in_rel = right_input_node.out_rel

    # Get columns from both relations
    left_cols = left_in_rel.columns
    right_cols = right_in_rel.columns

    # Get columns we will join on
    left_join_cols = [utils.find(left_cols, left_col_name) for left_col_name in left_col_names]
    right_join_cols = [utils.find(right_cols, right_col_name) for right_col_name in right_col_names]

    # # Get the key columns' merged collusion set
    # keyCollusionSet = utils.mergeCollusionSets(
    #     left_join_col.collusionSet, right_join_col.collusionSet)

    # Create new key columns
    out_key_cols = []
    for i in range(len(left_join_cols)):
        out_key_cols.append(
            rel.Column(output_name, left_join_cols[i].get_name(), i, left_join_cols[i].type_str, set()))

    # Define output relation columns.
    # These will be the key columns followed
    # by all columns from left (other than join columns)
    # and right (again excluding join columns)

    start_idx = len(out_key_cols)
    # continue_idx will be (start_idx + len(left_in_rel.columns) - len(left_join_cols)),
    # which is just len(left_in_rel.columns)
    continue_idx = len(left_in_rel.columns)
    out_rel_cols = out_key_cols \
        + _cols_from_rel(
            start_idx, left_in_rel, [left_join_col.idx for left_join_col in left_join_cols]) \
        + _cols_from_rel(
            continue_idx, right_in_rel, [right_join_col.idx for right_join_col in right_join_cols])

    # The result of the join will be stored with the union
    # of the parties storing left and right
    out_stored_with = left_in_rel.stored_with.union(right_in_rel.stored_with)

    # Create output relation
    out_rel = rel.Relation(output_name, out_rel_cols, out_stored_with)
    out_rel.update_columns()

    # Create join operator
    op = saldag.Join(
        out_rel,
        left_input_node,
        right_input_node,
        left_join_cols,
        right_join_cols
    )

    # Add it as a child to both input nodes
    left_input_node.children.add(op)
    right_input_node.children.add(op)

    return op