def jfc_composite_select(new_node,child,pk_list): tn = None tn_alias = None if isinstance(child,ystree.TwoJoinNode): if isinstance(child.left_child,ystree.TableNode): if child.left_child.table_alias != "": tn = child.left_child.table_name tn_alias = child.left_child.table_alias else: tn = child.left_child.table_name tn_alias = tn tmp_exp_list = copy.deepcopy(child.get_mapoutput(tn_alias)) for exp in tmp_exp_list: exp.table_name = tn new_node.mapoutput[tn] = tmp_exp_list new_node.pk_dict[tn] = pk_list tmp_where = child.get_mapfilter(tn_alias) if tmp_where is not None: col_list = [] ystree.__get_func_para__(tmp_where.where_condition_exp,col_list) for col in col_list: col.table_name = tn tmp = [] tmp.append(tmp_where.where_condition_exp) new_node.mapfilter[tn] = tmp if isinstance(child.right_child,ystree.TableNode): if child.right_child.table_alias != "": tn = child.right_child.table_name tn_alias = child.right_child.table_alias else: tn = child.right_child.table_name tn_alias = tn tmp_exp_list = copy.deepcopy(child.get_mapoutput(tn_alias)) for exp in tmp_exp_list: exp.table_name = tn new_node.mapoutput[tn] = tmp_exp_list new_node.pk_dict[tn] = pk_list tmp_where = child.get_mapfilter(tn_alias) if tmp_where is not None: col_list = [] ystree.__get_func_para__(tmp_where.where_condition_exp,col_list) for col in col_list: col.table_name = tn tmp = [] tmp.append(tmp_where.where_condition_exp) new_node.mapfilter[tn] = tmp else: ###### the child is a groupby node if isinstance(child.child,ystree.TableNode): tn = child.child.table_name if child.child.table_alias != "": tn_alias = child.child.table_alias else: tn_alias = tn tmp_exp_list = copy.deepcopy(child.get_mapoutput(tn_alias)) for exp in tmp_exp_list: exp.table_name = tn new_node.mapoutput[tn] = tmp_exp_list new_node.pk_dict[tn] = pk_list tmp_where = child.get_mapfilter(tn_alias) if tmp_where is not None: col_list = [] ystree.__get_func_para__(tmp_where.where_condition_exp,col_list) for col in col_list: col.table_name = tn tmp = [] tmp.append(tmp_where.where_condition_exp) new_node.mapfilter[tn] = tmp
def jfc_composite_select(new_node, child, pk_list): tn = None tn_alias = None if isinstance(child, ystree.TwoJoinNode): if isinstance(child.left_child, ystree.TableNode): if child.left_child.table_alias != "": tn = child.left_child.table_name tn_alias = child.left_child.table_alias else: tn = child.left_child.table_name tn_alias = tn tmp_exp_list = copy.deepcopy(child.get_mapoutput(tn_alias)) for exp in tmp_exp_list: exp.table_name = tn new_node.mapoutput[tn] = tmp_exp_list new_node.pk_dict[tn] = pk_list tmp_where = child.get_mapfilter(tn_alias) if tmp_where is not None: col_list = [] ystree.__get_func_para__(tmp_where.where_condition_exp, col_list) for col in col_list: col.table_name = tn tmp = [] tmp.append(tmp_where.where_condition_exp) new_node.mapfilter[tn] = tmp if isinstance(child.right_child, ystree.TableNode): if child.right_child.table_alias != "": tn = child.right_child.table_name tn_alias = child.right_child.table_alias else: tn = child.right_child.table_name tn_alias = tn tmp_exp_list = copy.deepcopy(child.get_mapoutput(tn_alias)) for exp in tmp_exp_list: exp.table_name = tn new_node.mapoutput[tn] = tmp_exp_list new_node.pk_dict[tn] = pk_list tmp_where = child.get_mapfilter(tn_alias) if tmp_where is not None: col_list = [] ystree.__get_func_para__(tmp_where.where_condition_exp, col_list) for col in col_list: col.table_name = tn tmp = [] tmp.append(tmp_where.where_condition_exp) new_node.mapfilter[tn] = tmp else: ###### the child is a groupby node if isinstance(child.child, ystree.TableNode): tn = child.child.table_name if child.child.table_alias != "": tn_alias = child.child.table_alias else: tn_alias = tn tmp_exp_list = copy.deepcopy(child.get_mapoutput(tn_alias)) for exp in tmp_exp_list: exp.table_name = tn new_node.mapoutput[tn] = tmp_exp_list new_node.pk_dict[tn] = pk_list tmp_where = child.get_mapfilter(tn_alias) if tmp_where is not None: col_list = [] ystree.__get_func_para__(tmp_where.where_condition_exp, col_list) for col in col_list: col.table_name = tn tmp = [] tmp.append(tmp_where.where_condition_exp) new_node.mapfilter[tn] = tmp
def input_transit_correlation(tree): ### step1: look for input correlation ### the key of node_dict is the table name and the value are the nodes(GroupByNode, 2JoinNode) node_dict = {} __get_input_correlation__(tree,node_dict) for x in node_dict.keys(): if len(node_dict[x]) == 1: del node_dict[x] ### step2: look for transit correlation ret_dict = {} pk_dict = {} __get_transit_correlation__(node_dict,ret_dict,pk_dict) for table_name in ret_dict.keys(): new_node = ystree.CompositeNode() new_node.pk_dict[table_name] = pk_dict[table_name] for node in ret_dict[table_name]: table_list = [] if isinstance(node,ystree.TwoJoinNode): filter_name = "" if isinstance(node.left_child,ystree.TableNode): tmp_name = "" if node.left_child.table_alias != "": tmp_name = node.left_child.table_alias else: tmp_name = node.left_child.table_name table_list.append(tmp_name) if node.left_child.table_name not in pk_dict.keys(): filter_name = node.left_child.table_name if isinstance(node.right_child,ystree.TableNode): tmp_name = "" if node.right_child.table_alias !="": tmp_name = node.right_child.table_alias else: tmp_name = node.right_child.table_name table_list.append(tmp_name) if node.right_child.table_name not in pk_dict.keys(): filter_name = node.right_child.table_name pk_list = node.get_pk() for x in pk_list: for y in x: if y.table_name == filter_name: tmp=[] tmp.append(x) new_node.pk_dict[filter_name] = tmp break else: table_list.append(table_name) ### set compositeNode's map output list for x in table_list: tn = x tn_alias = x if x in node.table_alias_dict.keys(): tn = node.table_alias_dict[x] tmp_exp_list = node.get_mapoutput(tn_alias) for y in tmp_exp_list: new_exp = ystree.YRawColExp(tn,y.column_name) new_exp.column_name = int(new_exp.column_name) new_exp.column_type = y.column_type if tn in new_node.mapoutput.keys(): if ystree.list_contain_exp(new_node.mapoutput[tn],new_exp) is False: new_node.mapoutput[tn].append(new_exp) else: tmp=[] tmp.append(new_exp) new_node.mapoutput[tn] = tmp ### set mapfilter to reduce the data shuffled tmp_where = node.get_mapfilter(tn_alias) if tmp_where is not None: col_list = [] ystree.__get_func_para__(tmp_where.where_condition_exp,col_list) for exp in col_list: if exp.table_name in node.table_alias_dict.keys(): exp.table_name = node.table_alias_dict[exp.table_name] if tn in new_node.mapfilter.keys(): new_node.mapfilter[tn].append(tmp_where.where_condition_exp) else: tmp = [] tmp.append(tmp_where.where_condition_exp) new_node.mapfilter[tn] = tmp else: consExp = ystree.YConsExp(True,"BOOLEAN") if tn in new_node.mapfilter.keys(): new_node.mapfilter[tn].append(consExp) else: tmp=[] tmp.append(consExp) new_node.mapfilter[tn] = tmp ###insert the composite node into the tree new_node.it_node_list.append(node) if node.parent not in new_node.it_node_list: node.parent.set_composite(new_node,node) for node in new_node.it_node_list: if isinstance(node,ystree.TwoJoinNode): if node.left_child in new_node.it_node_list: continue if node.right_child in new_node.it_node_list: continue gen_composite_child(new_node,node) for node in new_node.child_list: ### set pk_dict for each child node. The child node's parent must be a join node col_list = [] join_exp = None if node.parent.join_explicit is True: join_exp = node.parent.join_condition.on_condition_exp else: join_exp = node.parent.join_condition.where_condition_exp if node == node.parent.left_child or node == node.parent.left_composite: __get_join_key__(join_exp,col_list,"LEFT") else: __get_join_key__(join_exp,col_list,"RIGHT") index = new_node.child_list.index(node) pk_list = [] pk_list.append(col_list) new_node.pk_dict[index] = pk_list
def input_transit_correlation(tree): ### step1: look for input correlation ### the key of node_dict is the table name and the value are the nodes(GroupByNode, 2JoinNode) node_dict = {} __get_input_correlation__(tree, node_dict) for x in node_dict.keys(): if len(node_dict[x]) == 1: del node_dict[x] ### step2: look for transit correlation ret_dict = {} pk_dict = {} __get_transit_correlation__(node_dict, ret_dict, pk_dict) for table_name in ret_dict.keys(): new_node = ystree.CompositeNode() new_node.pk_dict[table_name] = pk_dict[table_name] for node in ret_dict[table_name]: table_list = [] if isinstance(node, ystree.TwoJoinNode): filter_name = "" if isinstance(node.left_child, ystree.TableNode): tmp_name = "" if node.left_child.table_alias != "": tmp_name = node.left_child.table_alias else: tmp_name = node.left_child.table_name table_list.append(tmp_name) if node.left_child.table_name not in pk_dict.keys(): filter_name = node.left_child.table_name if isinstance(node.right_child, ystree.TableNode): tmp_name = "" if node.right_child.table_alias != "": tmp_name = node.right_child.table_alias else: tmp_name = node.right_child.table_name table_list.append(tmp_name) if node.right_child.table_name not in pk_dict.keys(): filter_name = node.right_child.table_name pk_list = node.get_pk() for x in pk_list: for y in x: if y.table_name == filter_name: tmp = [] tmp.append(x) new_node.pk_dict[filter_name] = tmp break else: table_list.append(table_name) ### set compositeNode's map output list for x in table_list: tn = x tn_alias = x if x in node.table_alias_dict.keys(): tn = node.table_alias_dict[x] tmp_exp_list = node.get_mapoutput(tn_alias) for y in tmp_exp_list: new_exp = ystree.YRawColExp(tn, y.column_name) new_exp.column_name = int(new_exp.column_name) new_exp.column_type = y.column_type if tn in new_node.mapoutput.keys(): if ystree.list_contain_exp(new_node.mapoutput[tn], new_exp) is False: new_node.mapoutput[tn].append(new_exp) else: tmp = [] tmp.append(new_exp) new_node.mapoutput[tn] = tmp ### set mapfilter to reduce the data shuffled tmp_where = node.get_mapfilter(tn_alias) if tmp_where is not None: col_list = [] ystree.__get_func_para__(tmp_where.where_condition_exp, col_list) for exp in col_list: if exp.table_name in node.table_alias_dict.keys(): exp.table_name = node.table_alias_dict[ exp.table_name] if tn in new_node.mapfilter.keys(): new_node.mapfilter[tn].append( tmp_where.where_condition_exp) else: tmp = [] tmp.append(tmp_where.where_condition_exp) new_node.mapfilter[tn] = tmp else: consExp = ystree.YConsExp(True, "BOOLEAN") if tn in new_node.mapfilter.keys(): new_node.mapfilter[tn].append(consExp) else: tmp = [] tmp.append(consExp) new_node.mapfilter[tn] = tmp ###insert the composite node into the tree new_node.it_node_list.append(node) if node.parent not in new_node.it_node_list: node.parent.set_composite(new_node, node) for node in new_node.it_node_list: if isinstance(node, ystree.TwoJoinNode): if node.left_child in new_node.it_node_list: continue if node.right_child in new_node.it_node_list: continue gen_composite_child(new_node, node) for node in new_node.child_list: ### set pk_dict for each child node. The child node's parent must be a join node col_list = [] join_exp = None if node.parent.join_explicit is True: join_exp = node.parent.join_condition.on_condition_exp else: join_exp = node.parent.join_condition.where_condition_exp if node == node.parent.left_child or node == node.parent.left_composite: __get_join_key__(join_exp, col_list, "LEFT") else: __get_join_key__(join_exp, col_list, "RIGHT") index = new_node.child_list.index(node) pk_list = [] pk_list.append(col_list) new_node.pk_dict[index] = pk_list