def entrance(in_tree, out_tree, twigs, l_in, l_out, node_set, q_in, q_out, q_in_menu, q_out_menu, fo): global head_map, END heads = [] END = len(twigs) for twig in twigs: heads.append(twig.head) print("开始获取head映射...") print("开始获取head映射...", file=statics.f_console) t1 = time.clock() # timer result = __get_head_map(in_tree, out_tree, heads, q_in, q_out, q_in_menu, q_out_menu) t2 = time.clock() # timer print("获取head映射耗时 " + str(t2 - t1)) print("获取head映射耗时 " + str(t2 - t1), file=statics.f_console) # 对照, 朴素的head获取方法 __naive_get_head_map(l_in, l_out, node_set, heads, q_in, q_out) if result == common.INVALID_CANDIDATE: return common.INVALID_CANDIDATE querybox_list = CQueue() querybox_list.put(common.QueryBox(0)) print("查询内核启动...") print("查询内核启动...", file=statics.f_console) t3 = time.clock() # timer while not querybox_list.is_empty(): x_box = querybox_list.get() result = __core(l_in=l_in, l_out=l_out, box=x_box, twigs=twigs, fo=fo) for each in result: querybox_list.put(each) t4 = time.clock() # timer print("查询内核运行耗时 " + str(t4 - t3)) print("查询内核运行耗时 " + str(t4 - t3), file=statics.f_console)
def __build_tree(table, menu, node_set, neighbor): root = TreeNode(data=[]) for node in node_set: pin = root if node not in menu: root.data.append(node) continue if not menu[node]: root.data.append(node) continue for label in menu[node]: for i in range(0, table[node][label]): if label not in pin.children: child = TreeNode(data=[]) pin.add_child(label, child) pin = child else: pin = pin.children[label] pin.data.append(node) print("开始调整出入度树以最大化差异...") print("开始调整出入度树以最大化差异...", file=statics.f_console) # 梳理树中的目录信息, 并根据邻居信息将树中同一树节点上的点按照最大novelty间隔排开 q = CQueue() q.put(root) t_start_intervein = time.clock() while not q.is_empty(): x_node = q.get() __intervein_node(x_node.data, neighbor) x_node.label_menu.sort() for edge in x_node.children: q.put(x_node.children[edge]) t_end_intervein = time.clock() print("差异化出入度表耗时 " + str(t_end_intervein - t_start_intervein)) print("差异化出入度表耗时 " + str(t_end_intervein - t_start_intervein), file=statics.f_console) print("开始计算prophecy...") print("开始计算prophecy...", file=statics.f_console) t_start_prophecy = time.clock() __dfs_for_prophecy(root) t_end_prophecy = time.clock() print("计算prophecy耗时: " + str(t_end_prophecy - t_start_prophecy)) print("计算prophecy耗时: " + str(t_end_prophecy - t_start_prophecy), file=statics.f_console) print("开始深先遍历出入度树...") print("开始深先遍历出入度树...", file=statics.f_console) statics.io_tree_max_dep = -1 dfs_for_depth(root, 1) print("得到最大深度:" + str(statics.io_tree_max_dep)) print("得到最大深度:" + str(statics.io_tree_max_dep), file=statics.f_console) return root
def filter(self): """ Filter all candidate by single projection if null set caused, return common.INVALIDATE_CANDIDAT :return: """ pipe_line = CQueue() for node in self.candidate: if len(self.candidate[node]) == 1: griddle = None for each in self.candidate[node]: griddle = each if griddle is None: return INVALID_CANDIDATE for other in self.candidate: if node != other: try: self.candidate[other].remove(griddle) new_len = len(self.candidate[other]) if new_len < 1: return INVALID_CANDIDATE if new_len == 1: pipe_line.put(other) except: pass while not pipe_line.is_empty(): griddle = None x_node = pipe_line.get() for each in self.candidate[x_node]: griddle = each if griddle is None: return INVALID_CANDIDATE for other in self.candidate: if other != x_node: try: self.candidate[other].remove(griddle) new_len = len(self.candidate[other]) if new_len < 1: return INVALID_CANDIDATE if new_len == 1: pipe_line.put(other) except: pass return VALID_CANDIDATE
def load_map(): """ Pass L_in, L_out, noted for in/out degree dictionary and in/out label-marked dictionary NOTE! The initialization must be done outside the function """ print("开始加载文件...") print("开始加载文件...", file=statics.f_console) t_start_load_file = time.clock() # timer l_in = {} l_out = {} l_in_menu = {} l_out_menu = {} node_set = set() f = open(ROOT_PATH + "data.txt") while True: line = f.readline() if len(line) < 1: break tpl = line.split('\t') ori = int(tpl[0]) edg = int(tpl[1]) des = int(tpl[2]) node_set.add(ori) node_set.add(des) if ori not in l_out: l_out[ori] = {} if ori not in l_out_menu: l_out_menu[ori] = [] if des not in l_in: l_in[des] = {} if des not in l_in_menu: l_in_menu[des] = [] try: l_in[des][edg].append(ori) except: l_in[des][edg] = [] l_in[des][edg].append(ori) try: l_out[ori][edg].append(des) except: l_out[ori][edg] = [] l_out[ori][edg].append(des) if edg not in l_in_menu[des]: l_in_menu[des].append(edg) if edg not in l_out_menu[ori]: l_out_menu[ori].append(edg) f.close() t_end_load_file = time.clock() # timer print("结束加载文件...") print("结束加载文件...", file=statics.f_console) print('加载文件耗时 ' + str(t_end_load_file - t_start_load_file)) print('加载文件耗时 ' + str(t_end_load_file - t_start_load_file), file=statics.f_console) t_start_sort_raw_data = time.clock() # timer __inner_sort(l_in, l_in_menu) __inner_sort(l_out, l_out_menu) t_end_sort_raw_data = time.clock() # timer print("原始数据内部排序耗时 " + str(t_end_sort_raw_data - t_start_sort_raw_data)) print("原始数据内部排序耗时 " + str(t_end_sort_raw_data - t_start_sort_raw_data), file=statics.f_console) print("开始统计邻居信息") print("开始统计邻居信息", file=statics.f_console) t_start_neighbor = time.clock() # timer # 统计邻居节点信息 neighbor = {} # 先统计入度 visited = set() q_node = CQueue() q_dep = CQueue() for node in node_set: visited.clear() visited.add(node) neighbor[node] = NeighborInfo() q_node.clear() q_dep.clear() q_node.put(node) q_dep.put(0) while not q_node.is_empty(): x_node = q_node.get() x_dep = q_dep.get() next_dep = x_dep + 1 if x_node not in l_in: continue for l in l_in[x_node]: for ori in l_in[x_node][l]: if ori not in visited: neighbor[node].safe_add(ori, statics.fade_factor_pow[next_dep], target='in') visited.add(ori) if next_dep < statics.neighbor_threshold: q_node.put(ori) q_dep.put(next_dep) neighbor[node].cal_module(target='in') # 先不统计出度 t_end_neighbor = time.clock() # timer print("统计邻居信息耗时 " + str(t_end_neighbor - t_start_neighbor)) print("统计邻居信息耗时 " + str(t_end_neighbor - t_start_neighbor), file=statics.f_console) return l_in, l_out, l_in_menu, l_out_menu, node_set, neighbor
def load_map(): """ Pass L_in, L_out, noted for in/out degree dictionary and in/out label-marked dictionary NOTE! The initialization must be done outside the function """ print("开始加载文件...") print("开始加载文件...", file=statics.f_console) t_start_load_file = time.clock() # timer l_in = {} l_out = {} l_in_menu = {} l_out_menu = {} node_set = set() f = open(ROOT_PATH + "data.txt") while True: line = f.readline() if len(line) < 1: break tpl = line.split('\t') ori = int(tpl[0]) edg = int(tpl[1]) des = int(tpl[2]) node_set.add(ori) node_set.add(des) if ori not in l_out: l_out[ori] = {} if ori not in l_out_menu: l_out_menu[ori] = [] if des not in l_in: l_in[des] = {} if des not in l_in_menu: l_in_menu[des] = [] try: l_in[des][edg].append(ori) except: l_in[des][edg] = [] l_in[des][edg].append(ori) try: l_out[ori][edg].append(des) except: l_out[ori][edg] = [] l_out[ori][edg].append(des) if edg not in l_in_menu[des]: l_in_menu[des].append(edg) if edg not in l_out_menu[ori]: l_out_menu[ori].append(edg) f.close() t_end_load_file = time.clock() # timer print("结束加载文件...") print("结束加载文件...", file=statics.f_console) print('加载文件耗时 ' + str(t_end_load_file - t_start_load_file)) print('加载文件耗时 ' + str(t_end_load_file - t_start_load_file), file=statics.f_console) t_start_sort_raw_data = time.clock() # timer __inner_sort(l_in, l_in_menu) __inner_sort(l_out, l_out_menu) t_end_sort_raw_data = time.clock() # timer print("原始数据内部排序耗时 " + str(t_end_sort_raw_data - t_start_sort_raw_data)) print("原始数据内部排序耗时 " + str(t_end_sort_raw_data - t_start_sort_raw_data), file=statics.f_console) print("开始统计邻居信息") print("开始统计邻居信息", file=statics.f_console) t_start_neighbor = time.clock() # timer # 统计邻居节点信息 neighbor = {} # 先统计入度 visited = set() q_node = CQueue() q_dep = CQueue() for node in node_set: visited.clear() visited.add(node) neighbor[node] = NeighborInfo() q_node.clear() q_dep.clear() q_node.put(node) q_dep.put(0) while not q_node.is_empty(): x_node = q_node.get() x_dep = q_dep.get() next_dep = x_dep + 1 if x_node not in l_in: continue for l in l_in[x_node]: for ori in l_in[x_node][l]: if ori not in visited: neighbor[node].safe_add( ori, statics.fade_factor_pow[next_dep], target='in') visited.add(ori) if next_dep < statics.neighbor_threshold: q_node.put(ori) q_dep.put(next_dep) neighbor[node].cal_module(target='in') # 先不统计出度 t_end_neighbor = time.clock() # timer print("统计邻居信息耗时 " + str(t_end_neighbor - t_start_neighbor)) print("统计邻居信息耗时 " + str(t_end_neighbor - t_start_neighbor), file=statics.f_console) return l_in, l_out, l_in_menu, l_out_menu, node_set, neighbor