Exemple #1
0
def entrance(in_tree, out_tree, twigs, l_in, l_out, node_set, q_in, q_out,
             q_in_menu, q_out_menu, fo):
    global head_map, END

    heads = []
    END = len(twigs)
    for twig in twigs:
        heads.append(twig.head)
    print("开始获取head映射...")
    print("开始获取head映射...", file=statics.f_console)
    t1 = time.clock()  # timer
    result = __get_head_map(in_tree, out_tree, heads, q_in, q_out, q_in_menu,
                            q_out_menu)
    t2 = time.clock()  # timer
    print("获取head映射耗时 " + str(t2 - t1))
    print("获取head映射耗时 " + str(t2 - t1), file=statics.f_console)

    # 对照, 朴素的head获取方法
    __naive_get_head_map(l_in, l_out, node_set, heads, q_in, q_out)

    if result == common.INVALID_CANDIDATE:
        return common.INVALID_CANDIDATE
    querybox_list = CQueue()
    querybox_list.put(common.QueryBox(0))
    print("查询内核启动...")
    print("查询内核启动...", file=statics.f_console)
    t3 = time.clock()  # timer
    while not querybox_list.is_empty():
        x_box = querybox_list.get()
        result = __core(l_in=l_in, l_out=l_out, box=x_box, twigs=twigs, fo=fo)
        for each in result:
            querybox_list.put(each)
    t4 = time.clock()  # timer
    print("查询内核运行耗时 " + str(t4 - t3))
    print("查询内核运行耗时 " + str(t4 - t3), file=statics.f_console)
def entrance(in_tree, out_tree, twigs, l_in, l_out, node_set, q_in, q_out, q_in_menu, q_out_menu, fo):
    global head_map, END

    heads = []
    END = len(twigs)
    for twig in twigs:
        heads.append(twig.head)
    print("开始获取head映射...")
    print("开始获取head映射...", file=statics.f_console)
    t1 = time.clock()  # timer
    result = __get_head_map(in_tree, out_tree, heads, q_in, q_out, q_in_menu, q_out_menu)
    t2 = time.clock()  # timer
    print("获取head映射耗时 " + str(t2 - t1))
    print("获取head映射耗时 " + str(t2 - t1), file=statics.f_console)

    # 对照, 朴素的head获取方法
    __naive_get_head_map(l_in, l_out, node_set, heads, q_in, q_out)

    if result == common.INVALID_CANDIDATE:
        return common.INVALID_CANDIDATE
    querybox_list = CQueue()
    querybox_list.put(common.QueryBox(0))
    print("查询内核启动...")
    print("查询内核启动...", file=statics.f_console)
    t3 = time.clock()  # timer
    while not querybox_list.is_empty():
        x_box = querybox_list.get()
        result = __core(l_in=l_in, l_out=l_out, box=x_box, twigs=twigs, fo=fo)
        for each in result:
            querybox_list.put(each)
    t4 = time.clock()  # timer
    print("查询内核运行耗时 " + str(t4 - t3))
    print("查询内核运行耗时 " + str(t4 - t3), file=statics.f_console)
Exemple #3
0
def __build_tree(table, menu, node_set, neighbor):
    root = TreeNode(data=[])
    for node in node_set:
        pin = root
        if node not in menu:
            root.data.append(node)
            continue
        if not menu[node]:
            root.data.append(node)
            continue

        for label in menu[node]:
            for i in range(0, table[node][label]):
                if label not in pin.children:
                    child = TreeNode(data=[])
                    pin.add_child(label, child)
                    pin = child
                else:
                    pin = pin.children[label]
        pin.data.append(node)

    print("开始调整出入度树以最大化差异...")
    print("开始调整出入度树以最大化差异...", file=statics.f_console)
    # 梳理树中的目录信息, 并根据邻居信息将树中同一树节点上的点按照最大novelty间隔排开
    q = CQueue()
    q.put(root)
    t_start_intervein = time.clock()
    while not q.is_empty():
        x_node = q.get()
        __intervein_node(x_node.data, neighbor)
        x_node.label_menu.sort()
        for edge in x_node.children:
            q.put(x_node.children[edge])
    t_end_intervein = time.clock()
    print("差异化出入度表耗时 " + str(t_end_intervein - t_start_intervein))
    print("差异化出入度表耗时 " + str(t_end_intervein - t_start_intervein), file=statics.f_console)


    print("开始计算prophecy...")
    print("开始计算prophecy...", file=statics.f_console)
    t_start_prophecy = time.clock()
    __dfs_for_prophecy(root)
    t_end_prophecy = time.clock()
    print("计算prophecy耗时: " + str(t_end_prophecy - t_start_prophecy))
    print("计算prophecy耗时: " + str(t_end_prophecy - t_start_prophecy), file=statics.f_console)

    print("开始深先遍历出入度树...")
    print("开始深先遍历出入度树...", file=statics.f_console)
    statics.io_tree_max_dep = -1
    dfs_for_depth(root, 1)
    print("得到最大深度:" + str(statics.io_tree_max_dep))
    print("得到最大深度:" + str(statics.io_tree_max_dep), file=statics.f_console)
    return root
Exemple #4
0
def __build_tree(table, menu, node_set, neighbor):
    root = TreeNode(data=[])
    for node in node_set:
        pin = root
        if node not in menu:
            root.data.append(node)
            continue
        if not menu[node]:
            root.data.append(node)
            continue

        for label in menu[node]:
            for i in range(0, table[node][label]):
                if label not in pin.children:
                    child = TreeNode(data=[])
                    pin.add_child(label, child)
                    pin = child
                else:
                    pin = pin.children[label]
        pin.data.append(node)

    print("开始调整出入度树以最大化差异...")
    print("开始调整出入度树以最大化差异...", file=statics.f_console)
    # 梳理树中的目录信息, 并根据邻居信息将树中同一树节点上的点按照最大novelty间隔排开
    q = CQueue()
    q.put(root)
    t_start_intervein = time.clock()
    while not q.is_empty():
        x_node = q.get()
        __intervein_node(x_node.data, neighbor)
        x_node.label_menu.sort()
        for edge in x_node.children:
            q.put(x_node.children[edge])
    t_end_intervein = time.clock()
    print("差异化出入度表耗时 " + str(t_end_intervein - t_start_intervein))
    print("差异化出入度表耗时 " + str(t_end_intervein - t_start_intervein), file=statics.f_console)

    print("开始计算prophecy...")
    print("开始计算prophecy...", file=statics.f_console)
    t_start_prophecy = time.clock()
    __dfs_for_prophecy(root)
    t_end_prophecy = time.clock()
    print("计算prophecy耗时: " + str(t_end_prophecy - t_start_prophecy))
    print("计算prophecy耗时: " + str(t_end_prophecy - t_start_prophecy), file=statics.f_console)

    print("开始深先遍历出入度树...")
    print("开始深先遍历出入度树...", file=statics.f_console)
    statics.io_tree_max_dep = -1
    dfs_for_depth(root, 1)
    print("得到最大深度:" + str(statics.io_tree_max_dep))
    print("得到最大深度:" + str(statics.io_tree_max_dep), file=statics.f_console)
    return root
Exemple #5
0
    def filter(self):
        """
        Filter all candidate by single projection
        if null set caused, return common.INVALIDATE_CANDIDAT
        :return:
        """
        pipe_line = CQueue()
        for node in self.candidate:
            if len(self.candidate[node]) == 1:
                griddle = None
                for each in self.candidate[node]:
                    griddle = each
                if griddle is None:
                    return INVALID_CANDIDATE
                for other in self.candidate:
                    if node != other:
                        try:
                            self.candidate[other].remove(griddle)
                            new_len = len(self.candidate[other])
                            if new_len < 1:
                                return INVALID_CANDIDATE
                            if new_len == 1:
                                pipe_line.put(other)
                        except:
                            pass

        while not pipe_line.is_empty():
            griddle = None
            x_node = pipe_line.get()
            for each in self.candidate[x_node]:
                griddle = each
            if griddle is None:
                return INVALID_CANDIDATE
            for other in self.candidate:
                if other != x_node:
                    try:
                        self.candidate[other].remove(griddle)
                        new_len = len(self.candidate[other])
                        if new_len < 1:
                            return INVALID_CANDIDATE
                        if new_len == 1:
                            pipe_line.put(other)
                    except:
                        pass

        return VALID_CANDIDATE
Exemple #6
0
    def filter(self):
        """
        Filter all candidate by single projection
        if null set caused, return common.INVALIDATE_CANDIDAT
        :return:
        """
        pipe_line = CQueue()
        for node in self.candidate:
            if len(self.candidate[node]) == 1:
                griddle = None
                for each in self.candidate[node]:
                    griddle = each
                if griddle is None:
                    return INVALID_CANDIDATE
                for other in self.candidate:
                    if node != other:
                        try:
                            self.candidate[other].remove(griddle)
                            new_len = len(self.candidate[other])
                            if new_len < 1:
                                return INVALID_CANDIDATE
                            if new_len == 1:
                                pipe_line.put(other)
                        except:
                            pass

        while not pipe_line.is_empty():
            griddle = None
            x_node = pipe_line.get()
            for each in self.candidate[x_node]:
                griddle = each
            if griddle is None:
                return INVALID_CANDIDATE
            for other in self.candidate:
                if other != x_node:
                    try:
                        self.candidate[other].remove(griddle)
                        new_len = len(self.candidate[other])
                        if new_len < 1:
                            return INVALID_CANDIDATE
                        if new_len == 1:
                            pipe_line.put(other)
                    except:
                        pass

        return VALID_CANDIDATE
Exemple #7
0
def intersect(a, b):
    """
    删
    This is much much slower than the builtin set.intersection!
    <s>
    Given two sorted list, return the intersection of them.

    *Each list shouldn't have duplicated data, according the attribute of set.*
    </s>
    """
    if len(a) == 0 or len(b) == 0:
        return []
    ha = 0
    hb = 0
    la = len(a) - 1  # limit of array a
    lb = len(b) - 1  # limit of array b
    tmp = CQueue()
    while True:
        p2 = __dimidiate_search_ge(a[ha], hb, lb, b)
        if p2 > lb:
            break
        if a[ha] == b[p2]:
            tmp.put(a[ha])
            ha += 1
            hb = p2 + 1
        else:
            ha += 1
            hb = p2

        if ha > la or hb > lb:
            break

        p1 = __dimidiate_search_ge(b[hb], ha, la, a)
        if p1 > la:
            break
        if a[p1] == b[hb]:
            tmp.put(b[hb])
            hb += 1
            ha = p1 + 1
        else:
            hb += 1
            ha = p1 + 1
        if ha > la or hb > lb:
            break
    return tmp.get_queue_copy()
Exemple #8
0
def intersect(a, b):
    """
    删
    This is much much slower than the builtin set.intersection!
    <s>
    Given two sorted list, return the intersection of them.

    *Each list shouldn't have duplicated data, according the attribute of set.*
    </s>
    """
    if len(a) == 0 or len(b) == 0:
        return []
    ha = 0
    hb = 0
    la = len(a) - 1  # limit of array a
    lb = len(b) - 1  # limit of array b
    tmp = CQueue()
    while True:
        p2 = __dimidiate_search_ge(a[ha], hb, lb, b)
        if p2 > lb:
            break
        if a[ha] == b[p2]:
            tmp.put(a[ha])
            ha += 1
            hb = p2 + 1
        else:
            ha += 1
            hb = p2

        if ha > la or hb > lb:
            break

        p1 = __dimidiate_search_ge(b[hb], ha, la, a)
        if p1 > la:
            break
        if a[p1] == b[hb]:
            tmp.put(b[hb])
            hb += 1
            ha = p1 + 1
        else:
            hb += 1
            ha = p1 + 1
        if ha > la or hb > lb:
            break
    return tmp.get_queue_copy()
Exemple #9
0
def load_map():
    """
    Pass L_in, L_out, noted for in/out degree dictionary and in/out label-marked dictionary

    NOTE! The initialization must be done outside the function
    """
    print("开始加载文件...")
    print("开始加载文件...", file=statics.f_console)
    t_start_load_file = time.clock()  # timer
    l_in = {}
    l_out = {}
    l_in_menu = {}
    l_out_menu = {}
    node_set = set()
    f = open(ROOT_PATH + "data.txt")
    while True:
        line = f.readline()
        if len(line) < 1:
            break
        tpl = line.split('\t')
        ori = int(tpl[0])
        edg = int(tpl[1])
        des = int(tpl[2])

        node_set.add(ori)
        node_set.add(des)

        if ori not in l_out:
            l_out[ori] = {}
        if ori not in l_out_menu:
            l_out_menu[ori] = []
        if des not in l_in:
            l_in[des] = {}
        if des not in l_in_menu:
            l_in_menu[des] = []

        try:
            l_in[des][edg].append(ori)
        except:
            l_in[des][edg] = []
            l_in[des][edg].append(ori)

        try:
            l_out[ori][edg].append(des)
        except:
            l_out[ori][edg] = []
            l_out[ori][edg].append(des)

        if edg not in l_in_menu[des]:
            l_in_menu[des].append(edg)
        if edg not in l_out_menu[ori]:
            l_out_menu[ori].append(edg)
    f.close()
    t_end_load_file = time.clock()  # timer
    print("结束加载文件...")
    print("结束加载文件...", file=statics.f_console)
    print('加载文件耗时 ' + str(t_end_load_file - t_start_load_file))
    print('加载文件耗时 ' + str(t_end_load_file - t_start_load_file), file=statics.f_console)

    t_start_sort_raw_data = time.clock()  # timer
    __inner_sort(l_in, l_in_menu)
    __inner_sort(l_out, l_out_menu)
    t_end_sort_raw_data = time.clock()  # timer
    print("原始数据内部排序耗时 " + str(t_end_sort_raw_data - t_start_sort_raw_data))
    print("原始数据内部排序耗时 " + str(t_end_sort_raw_data - t_start_sort_raw_data), file=statics.f_console)

    print("开始统计邻居信息")
    print("开始统计邻居信息", file=statics.f_console)
    t_start_neighbor = time.clock()  # timer
    # 统计邻居节点信息
    neighbor = {}
    # 先统计入度
    visited = set()
    q_node = CQueue()
    q_dep = CQueue()
    for node in node_set:
        visited.clear()
        visited.add(node)
        neighbor[node] = NeighborInfo()
        q_node.clear()
        q_dep.clear()
        q_node.put(node)
        q_dep.put(0)
        while not q_node.is_empty():
            x_node = q_node.get()
            x_dep = q_dep.get()
            next_dep = x_dep + 1
            if x_node not in l_in:
                continue
            for l in l_in[x_node]:
                for ori in l_in[x_node][l]:
                    if ori not in visited:
                        neighbor[node].safe_add(ori, statics.fade_factor_pow[next_dep], target='in')
                        visited.add(ori)
                        if next_dep < statics.neighbor_threshold:
                            q_node.put(ori)
                            q_dep.put(next_dep)
        neighbor[node].cal_module(target='in')
    # 先不统计出度
    t_end_neighbor = time.clock()  # timer
    print("统计邻居信息耗时 " + str(t_end_neighbor - t_start_neighbor))
    print("统计邻居信息耗时 " + str(t_end_neighbor - t_start_neighbor), file=statics.f_console)

    return l_in, l_out, l_in_menu, l_out_menu, node_set, neighbor
Exemple #10
0
def load_map():
    """
    Pass L_in, L_out, noted for in/out degree dictionary and in/out label-marked dictionary

    NOTE! The initialization must be done outside the function
    """
    print("开始加载文件...")
    print("开始加载文件...", file=statics.f_console)
    t_start_load_file = time.clock()  # timer
    l_in = {}
    l_out = {}
    l_in_menu = {}
    l_out_menu = {}
    node_set = set()
    f = open(ROOT_PATH + "data.txt")
    while True:
        line = f.readline()
        if len(line) < 1:
            break
        tpl = line.split('\t')
        ori = int(tpl[0])
        edg = int(tpl[1])
        des = int(tpl[2])

        node_set.add(ori)
        node_set.add(des)

        if ori not in l_out:
            l_out[ori] = {}
        if ori not in l_out_menu:
            l_out_menu[ori] = []
        if des not in l_in:
            l_in[des] = {}
        if des not in l_in_menu:
            l_in_menu[des] = []

        try:
            l_in[des][edg].append(ori)
        except:
            l_in[des][edg] = []
            l_in[des][edg].append(ori)

        try:
            l_out[ori][edg].append(des)
        except:
            l_out[ori][edg] = []
            l_out[ori][edg].append(des)

        if edg not in l_in_menu[des]:
            l_in_menu[des].append(edg)
        if edg not in l_out_menu[ori]:
            l_out_menu[ori].append(edg)
    f.close()
    t_end_load_file = time.clock()  # timer
    print("结束加载文件...")
    print("结束加载文件...", file=statics.f_console)
    print('加载文件耗时 ' + str(t_end_load_file - t_start_load_file))
    print('加载文件耗时 ' + str(t_end_load_file - t_start_load_file),
          file=statics.f_console)

    t_start_sort_raw_data = time.clock()  # timer
    __inner_sort(l_in, l_in_menu)
    __inner_sort(l_out, l_out_menu)
    t_end_sort_raw_data = time.clock()  # timer
    print("原始数据内部排序耗时 " + str(t_end_sort_raw_data - t_start_sort_raw_data))
    print("原始数据内部排序耗时 " + str(t_end_sort_raw_data - t_start_sort_raw_data),
          file=statics.f_console)

    print("开始统计邻居信息")
    print("开始统计邻居信息", file=statics.f_console)
    t_start_neighbor = time.clock()  # timer
    # 统计邻居节点信息
    neighbor = {}
    # 先统计入度
    visited = set()
    q_node = CQueue()
    q_dep = CQueue()
    for node in node_set:
        visited.clear()
        visited.add(node)
        neighbor[node] = NeighborInfo()
        q_node.clear()
        q_dep.clear()
        q_node.put(node)
        q_dep.put(0)
        while not q_node.is_empty():
            x_node = q_node.get()
            x_dep = q_dep.get()
            next_dep = x_dep + 1
            if x_node not in l_in:
                continue
            for l in l_in[x_node]:
                for ori in l_in[x_node][l]:
                    if ori not in visited:
                        neighbor[node].safe_add(
                            ori,
                            statics.fade_factor_pow[next_dep],
                            target='in')
                        visited.add(ori)
                        if next_dep < statics.neighbor_threshold:
                            q_node.put(ori)
                            q_dep.put(next_dep)
        neighbor[node].cal_module(target='in')
    # 先不统计出度
    t_end_neighbor = time.clock()  # timer
    print("统计邻居信息耗时 " + str(t_end_neighbor - t_start_neighbor))
    print("统计邻居信息耗时 " + str(t_end_neighbor - t_start_neighbor),
          file=statics.f_console)

    return l_in, l_out, l_in_menu, l_out_menu, node_set, neighbor
Exemple #11
0
def union(a, b):
    """
    删
    This is much much slower than the builtin set.union!
    <s>
    Given two sorted list, return the union of them.

    *Each list shouldn't have duplicated data, according the attribute of set.*
    </s>
    """
    if len(a) == 0:
        return b
    if len(b) == 0:
        return a
    ha = 0
    hb = 0
    la = len(a) - 1  # limit of array a
    lb = len(b) - 1  # limit of array b
    tmp = CQueue()
    while True:
        pb = __dimidiate_search_ge(a[ha], hb, lb, b)
        if pb < lb and a[ha] == b[pb]:
            tmp.extend(b[hb:pb + 1])
            ha += 1
            hb = pb + 1
        else:
            tmp.extend(b[hb:pb])
            tmp.put(a[ha])
            ha += 1
            hb = pb
        if ha > la or hb > lb:
            break

        pa = __dimidiate_search_ge(b[hb], ha, la, a)
        if pa < la and a[pa] == b[hb]:
            tmp.extend(a[ha:pa + 1])
            hb += 1
            ha = pa + 1
        else:
            tmp.extend(a[ha:pa])
            tmp.put(b[hb])
            hb += 1
            ha = pa
        if ha > la or hb > lb:
            break

    tmp.extend(a[ha:la + 1])
    tmp.extend(b[hb:lb + 1])

    return tmp.get_queue_copy()
Exemple #12
0
def union(a, b):
    """
    删
    This is much much slower than the builtin set.union!
    <s>
    Given two sorted list, return the union of them.

    *Each list shouldn't have duplicated data, according the attribute of set.*
    </s>
    """
    if len(a) == 0:
        return b
    if len(b) == 0:
        return a
    ha = 0
    hb = 0
    la = len(a) - 1  # limit of array a
    lb = len(b) - 1  # limit of array b
    tmp = CQueue()
    while True:
        pb = __dimidiate_search_ge(a[ha], hb, lb, b)
        if pb < lb and a[ha] == b[pb]:
            tmp.extend(b[hb: pb + 1])
            ha += 1
            hb = pb + 1
        else:
            tmp.extend(b[hb: pb])
            tmp.put(a[ha])
            ha += 1
            hb = pb
        if ha > la or hb > lb:
            break

        pa = __dimidiate_search_ge(b[hb], ha, la, a)
        if pa < la and a[pa] == b[hb]:
            tmp.extend(a[ha: pa + 1])
            hb += 1
            ha = pa + 1
        else:
            tmp.extend(a[ha: pa])
            tmp.put(b[hb])
            hb += 1
            ha = pa
        if ha > la or hb > lb:
            break

    tmp.extend(a[ha: la + 1])
    tmp.extend(b[hb: lb + 1])

    return tmp.get_queue_copy()