def update_out_rel_cols(self): self.update_op_specific_cols() out_key_cols = self.update_out_key_cols() left_in_rel_cols = copy.deepcopy(self.get_left_in_rel().columns) right_in_rel_cols = copy.deepcopy(self.get_right_in_rel().columns) start_idx = len(out_key_cols) continue_idx = len(left_in_rel_cols) left_non_key_data = \ non_key_cols_from_rel( self.out_rel.name, start_idx, left_in_rel_cols, [c.idx for c in self.left_join_cols] ) left_non_key_cols = [Column(*d) for d in left_non_key_data] right_non_key_data = \ non_key_cols_from_rel( self.out_rel.name, continue_idx, right_in_rel_cols, [c.idx for c in self.right_join_cols] ) right_non_key_cols = [Column(*d) for d in right_non_key_data] self.out_rel.columns = out_key_cols + left_non_key_cols + right_non_key_cols self.out_rel.update_columns()
def join(left_input_node: OpNode, right_input_node: OpNode, name: str, left_col_names: list, right_col_names: list): if len(left_col_names) != len(right_col_names): raise Exception( f"Unequal number of left and right join cols passed to join():" f"\nLeft cols: {len(left_col_names)}" f"\nRight cols: {len(right_col_names)}" ) left_in_rel = left_input_node.out_rel right_in_rel = right_input_node.out_rel left_join_cols = [find(left_in_rel.columns, col_name) for col_name in left_col_names] check_cols_for_missing_entries(left_join_cols, left_in_rel.name) right_join_cols = [find(right_in_rel.columns, col_name) for col_name in right_col_names] check_cols_for_missing_entries(right_join_cols, right_in_rel.name) out_key_cols = [] for i in range(len(left_join_cols)): col_from_left = copy.copy(left_join_cols[i]) col_from_right = copy.copy(right_join_cols[i]) min_trust_set = col_from_left.trust_with.intersection(col_from_right.trust_with) min_plaintext_set = col_from_left.plaintext.intersection(col_from_right.plaintext) if left_join_cols[i].type_str != right_join_cols[i].type_str: raise Exception( f"Can't do join between columns of different type:\n" f"LEFT COL: {left_join_cols[i].type_str}\n" f"RIGHT COL: {right_join_cols[i].type_str}\n") out_key_cols.append( Column(name, left_join_cols[i].name, i, copy.copy(left_join_cols[i].type_str), min_trust_set, min_plaintext_set) ) start_idx = len(out_key_cols) continue_idx = len(left_in_rel.columns) left_non_key_data = \ non_key_cols_from_rel(name, start_idx, left_in_rel.columns, [lcol.idx for lcol in left_join_cols]) left_non_key_cols = [Column(*d) for d in left_non_key_data] right_non_key_data = \ non_key_cols_from_rel(name, continue_idx, right_in_rel.columns, [rcol.idx for rcol in right_join_cols]) right_non_key_cols = [Column(*d) for d in right_non_key_data] out_rel_cols = out_key_cols + left_non_key_cols + right_non_key_cols out_stored_with = stored_with_from_rels([left_in_rel, right_in_rel]) out_rel = Relation(name, out_rel_cols, out_stored_with) out_rel.update_columns() op = Join(out_rel, left_input_node, right_input_node, left_join_cols, right_join_cols) left_input_node.children.add(op) right_input_node.children.add(op) return op
def update_out_key_cols(self): """ Generate fresh output key columns with updated trust_with and plaintext sets. """ ret = [] for i in range(len(self.left_join_cols)): col_from_left = copy.copy(self.left_join_cols[i]) col_from_right = copy.copy(self.right_join_cols[i]) min_trust_set = col_from_left.trust_with.intersection( col_from_right.trust_with) min_plaintext_set = col_from_left.plaintext.intersection( col_from_right.plaintext) if self.left_join_cols[i].type_str != self.right_join_cols[ i].type_str: raise Exception( f"Can't do join between columns of different type:\n" f"LEFT COL: {self.left_join_cols[i].type_str}\n" f"RIGHT COL: {self.right_join_cols[i].type_str}\n") ret.append( Column(copy.copy(self.out_rel.name), copy.copy(self.left_join_cols[i].name), i, copy.copy(self.left_join_cols[i].type_str), min_trust_set, min_plaintext_set)) return ret
def update_squares_col(self): trust_set = copy.deepcopy(self.agg_col.trust_with) pt_set = copy.deepcopy(self.agg_col.plaintext) typ = copy.copy(self.agg_col.type_str) return Column(self.get_in_rel().name, "__SQUARES__", len(self.group_cols) + 1, typ, trust_set, pt_set)
def build_extra_cols(self, col_names): min_ts = min_trust_with_from_cols(self.group_cols + [self.agg_col]) min_pt = min_pt_set_from_cols(self.group_cols + [self.agg_col]) return [ Column(self.out_rel.name, col_names[i], i + len(self.group_cols), "INTEGER", min_ts, min_pt) for i in range(len(col_names)) ]
def update_count_col(self): """ Won't be part of input relation, need to generate fresh """ min_trust = min_trust_with_from_columns(self.group_cols) min_pt = min_pt_set_from_cols(self.group_cols) return Column(self.get_in_rel().name, self.count_col.name, len(self.group_cols), self.count_col.type_str, min_trust, min_pt)
def update_out_rel_cols(self): self.update_op_specific_cols() temp_cols = self.group_cols + [self.agg_col] if self.push_up_optimized: mean_squares_col = Column(self.out_rel.name, "__MEAN_SQUARES__", 2, self.agg_col.type_str, copy.deepcopy(self.agg_col.trust_with), copy.deepcopy(self.agg_col.plaintext)) temp_cols = temp_cols + [mean_squares_col] self.out_rel.columns = copy.deepcopy(temp_cols) self.out_rel.update_columns()
def update_count_col(self): if self.group_cols: min_trust = min_trust_with_from_cols(self.group_cols) min_pt = min_pt_set_from_cols(self.group_cols) else: # count col will just be the number of rows, which # all parties storing this data already know min_trust = max_set(self.out_rel.stored_with) min_pt = max_set(self.out_rel.stored_with) return Column(self.get_in_rel().name, "__COUNT__", len(self.group_cols) + 2, "INTEGER", min_trust, min_pt)
def update_out_rel_cols(self): """ Using max TW/PT sets here because if you either had or were trusted with a single column, you could have counted the number of rows it contained. """ temp_cols = copy.deepcopy(self.get_in_rel().columns) max_trust_set = max_trust_with_from_columns(temp_cols) max_pt_set = max_pt_set_from_cols(temp_cols) out_col = Column(self.out_rel.name, self.col_name, 0, "INTEGER", max_trust_set, max_pt_set) self.out_rel.columns = [out_col] self.out_rel.update_columns()
def index(input_op_node: OpNode, name: str, idx_col_name: str = "index"): in_rel = input_op_node.out_rel trust_set_union = max_trust_with_from_columns(in_rel.columns) pt_set_union = max_trust_with_from_columns(in_rel.columns) index_col = Column(name, idx_col_name, len(in_rel.columns), "INTEGER", trust_set_union, pt_set_union) out_rel_cols = [index_col] + copy.deepcopy(in_rel.columns) out_rel = Relation(name, out_rel_cols, copy.copy(in_rel.stored_with)) out_rel.update_columns() op = Index(out_rel, input_op_node, idx_col_name) input_op_node.children.add(op) return op
def column_union(left_input_node: OpNode, right_input_node: OpNode, name: str, left_col_name: str, right_col_name: str): left_in_rel = left_input_node.out_rel right_in_rel = right_input_node.out_rel left_col = find(left_in_rel.columns, left_col_name) if left_col is None: raise Exception( f"Column {left_col_name} not found in relation {left_in_rel.name}." ) right_col = find(right_input_node.out_rel.columns, right_col_name) if right_col is None: raise Exception( f"Column {right_col_name} not found in relation {right_in_rel.name}." ) new_trust_set = min_trust_with_from_cols([left_col, right_col]) pt = min_pt_set_from_cols([left_col, right_col]) out_col = Column(name, left_col_name, 0, "INTEGER", new_trust_set, plaintext=pt) out_stored_with = copy.copy(left_in_rel.stored_with) + copy.copy( right_in_rel.stored_with) out_rel = Relation(name, [out_col], out_stored_with) out_rel.update_columns() op = ColumnUnion(out_rel, left_input_node, right_input_node, left_col, right_col) left_input_node.children.add(op) right_input_node.children.add(op) return op