Example #1
0
    def parseData():
        target = 'georgetown'

        flagFile = '../data/flag_' + target + '.txt'

        tree, total_level, all_leaves = etl.prepare_tree('../data/tree2_' +
                                                         target)

        flag = {}
        with open(flagFile, "r") as f:
            for line in f:
                line = line.strip()
                if len(line) == 0:
                    continue
                items = line.split()
                if len(items) != 2:
                    continue
                if items[1] in flag.keys():
                    flag[items[1]].append(items[0])
                else:
                    flag[items[1]] = [items[0]]
        print(flag['3'])
        flagCnt = {}
        for k, v in flag.items():
            # print(k)
            # print(v)
            flagCnt[k] = len(v)
        print(flagCnt)
Example #2
0
def main(args):
    """
    training entrance
    :param args:
    :return:
    """
    # Parameters verifying.
    if (not args.do_train):
        raise ValueError('error.')
    if (args.hidden_dim % 2 != 0):
        raise ValueError('hidden_error')
    args.single_dim = args.hidden_dim // 2

    # Select a device, cpu or gpu.
    if args.usecuda:
        device = "cuda:2" if torch.cuda.is_available() else "cpu"
    else:
        device = "cpu"

    # Load the tree and some properties of the tree.
    tree, total_level, all_leaves = etl.prepare_tree(args.data_path)
    # load the graph
    graph = etl.prepare_graph(args.network_path)
    # Define the root node
    root = len(tree) - 1
    # Calc the graph similarity, i.e. the matrix \capA in paper.
    leaves_similarity = etl.get_leaves_similarity(graph)

    # Initialize the result and fix the root node's embedding.
    root_embedding_lower = torch.zeros(1, args.single_dim)
    root_embedding_upper = args.single_circle_range * torch.ones(
        1, args.single_dim)
    root_embedding = torch.cat((root_embedding_lower, root_embedding_upper),
                               1)[0]
    res = torch.zeros(len(tree), args.hidden_dim)
    res[root] = root_embedding

    # Initialize the layer dict containing lists of nodes of each layer.
    layerCounter = [[] for i in range(total_level)]
    for node in tree:
        layerCounter[node.level - 1].append(node.id)

    # Train HASNE layer by layer.
    layerWiseTraining(0, res, args, tree, leaves_similarity, device,
                      layerCounter)

    # calc the graph similarity
    leaves_similarity = etl.get_leaves_similarity(graph)

    train_dfs(root, res, args, tree, leaves_similarity, device)

    res_output = os.path.join(args.res_path, "res_" + str(int(time.time())))

    write_to_file(res_output, json.dumps(res.numpy().tolist()))
Example #3
0
    def visualizationZoom(embedding):
        """
        visualization for a simplified version.
        :param embedding:
        :return:
        """
        pdf_output = 'path to the results'

        tree, total_level, all_leaves = etl.prepare_tree('path to tree')

        fig = plt.figure(figsize=(7, 7))

        ax = fig.add_subplot(1, 1, 1)
        # Which nodes we want to display.
        displayNode = [122, 123]

        for index in range(len(embedding)):
            mark = 0
            # root
            if index == 128:
                mark = 1

            for disp in displayNode:
                if disp in tree[index].path:
                    mark = 1
                    break

            if mark != 1:
                continue

            each = embedding[index]
            start = each[0]
            end = each[1]

            tmp = start
            xx = []
            yy = []
            while tmp < end:
                x = tmp * math.cos(tmp / 10)
                y = tmp * math.sin(tmp / 10)
                x = float(x)
                y = float(y)
                xx.append(x)
                yy.append(y)
                tmp += math.pi * 0.001

            ax.plot(xx, yy, label='vis', ls='-', lw=10)
        plt.show()

        pp = PdfPages(pdf_output)
        pp.savefig(fig)
        pp.close()
Example #4
0
    def display(embedding):
        embedding = np.around(embedding, decimals=5)
        lower = embedding[:, :8]
        higher = embedding[:, 8:]
        # print(lower)
        # print(higher)
        # exit(1)
        diff = higher - lower

        displayfile = '../res/metric_display.txt'

        tree, total_level, all_leaves = etl.prepare_tree(
            '../data/tree2_hamilton')

        tmp = {}
        for i in range(len(diff)):
            tmpl = []
            l = diff[i].tolist()
            for each in l:
                tmpl.append('%.03f' % each)
            s = str(i) + ' ' + str(json.dumps(tmpl)) + "\r\n"
            tmp[i] = s
            # write_to_file(displayfile, s)

        write_to_file(displayfile, tmp[2618])
        write_to_file(displayfile, "\r\n\r\n")
        secondLevel = tree[2618].direct_children
        for i in secondLevel:
            write_to_file(displayfile, tmp[i])
        write_to_file(displayfile, "\r\n\r\n")
        thirdLevel = tree[secondLevel[0]].direct_children
        for j in thirdLevel:
            write_to_file(displayfile, tmp[j])
        write_to_file(displayfile, "\r\n\r\n")
        fourthLevel = tree[thirdLevel[0]].direct_children
        for k in fourthLevel:
            write_to_file(displayfile, tmp[k])
Example #5
0
import json

import torch

import codes.utils.data_etl as etl

# number of nodes
nums = 4000

treePath = "path to the tree file"
tree, total_level, all_leaves = etl.prepare_tree(treePath)

f = open("path to the results of GNE", encoding='utf-8')
content = f.read()
dict = json.loads(content)
embeddings = dict['coordinates']
radii = dict['radius']
len = len(radii)

# We draw 10 layers
layerDict = {
    1: [],
    2: [],
    3: [],
    4: [],
    5: [],
    6: [],
    7: [],
    8: [],
    9: [],
    10: []
Example #6
0
    def reconstruction(embedding, target):
        """
        reconstruction for network
        :param embedding:
        :param target:
        :return:
        """

        singleDim = 32
        flag_file = f"../data/flag_{target}.txt"
        edge_file = f"../data/edges_{target}.txt"
        tree_file = f"../data/tree2_{target}"
        tree, total_level, all_leaves = etl.prepare_tree('../data/tree2_' +
                                                         target)
        adjM = etl.prepare_graph(edge_file)
        objects = np.array(list(adjM.adj.keys()))
        nodesNum = len(objects)

        def calcLowerBoundDist(lowerBoundEmbedding, singleDim, needSum=True):
            """
            Calculate the distance based on the lower bound among embeddings.
            :param lowerBoundEmbedding:
            :return:
            """
            nodesNum = len(lowerBoundEmbedding)
            emb1 = torch.reshape(lowerBoundEmbedding.t(),
                                 (lowerBoundEmbedding.numel(), 1))
            emb1 = emb1.repeat(1, nodesNum)
            emb2 = torch.repeat_interleave(lowerBoundEmbedding.t(),
                                           repeats=nodesNum,
                                           dim=0)

            dimMixedDiff = torch.abs(emb1 - emb2)

            lowerDist = torch.unsqueeze(dimMixedDiff,
                                        0).reshape(singleDim, nodesNum,
                                                   nodesNum)
            if needSum:
                lowerDist = torch.sum(lowerDist, dim=0)

            return lowerDist

        def calcDist(tree, embedding):
            """
            distance metric
            :param tree:
            :param embedding:
            :return:
            """
            embeddingInTorch = torch.from_numpy(embedding)
            embeddingInTorchLower, embeddingInTorchHigher = torch.split(
                embeddingInTorch, singleDim, dim=1)
            layerBasedIndex = {}
            layerBasedTorch = {}
            for i in range(total_level):
                layerTmp = []
                for node in tree:
                    if node.level - 1 == i:
                        layerTmp.append(node.id)

                layerBasedTorch[i] = torch.index_select(
                    embeddingInTorchLower, dim=0, index=torch.tensor(layerTmp))
                layerBasedIndex[i] = layerTmp

            layerBasedDist = {}
            for each in layerBasedTorch:
                layerBasedDist[each] = calcLowerBoundDist(
                    layerBasedTorch[each], singleDim)

            leaves = layerBasedIndex[total_level - 1]
            leavesNum = len(leaves)
            leavesParentDict = {}
            for eachLayer in range(total_level):
                leavesParentDict[eachLayer] = []
            for node in tree:
                if node.id < leavesNum:
                    for eachLayer in range(total_level):
                        leavesParentDict[eachLayer].append(
                            node.path[eachLayer])
            pass
            leavesParentIndexList = {}
            for layerConuter in range(total_level):
                leavesParentIndexList[layerConuter] = [
                    layerBasedIndex[layerConuter].index(x)
                    for x in leavesParentDict[layerConuter]
                ]

            finalDist = torch.zeros((leavesNum, leavesNum))
            for u in range(leavesNum):
                print('handling u:%d' % u)
                for v in range(u, leavesNum):
                    if u == v:
                        continue
                    for index in leavesParentIndexList:
                        if index == 0:
                            continue
                        indexu = leavesParentIndexList[index][u]
                        indexv = leavesParentIndexList[index][v]
                        added = finalDist[u][v] + layerBasedDist[index][
                            indexu][indexv]
                        finalDist[u][v] = finalDist[v][u] = added
            return finalDist

        ranksum = nranks = ap_scores = iters = 0
        labels = np.empty(nodesNum)
        distMatrix = calcDist(tree, embedding)
        for object in objects:
            labels.fill(0)
            neighbors = np.array(list(adjM.adj[object]))

            if (len(neighbors) == 0):
                continue

            objDist = distMatrix[object]
            objDist[object] = 1e5
            sorted_dists, sorted_idx = objDist.sort()
            ranks, = np.where(
                np.in1d(sorted_idx.detach().cpu().numpy(), neighbors))
            # The above gives us the position of the neighbors in sorted order.  We
            # want to count the number of non-neighbors that occur before each neighbor
            ranks += 1
            N = ranks.shape[0]

            # To account for other positive nearer neighbors, we subtract (N*(N+1)/2)
            # As an example, assume the ranks of the neighbors are:
            # 0, 1, 4, 5, 6, 8
            # For each neighbor, we'd like to return the number of non-neighbors
            # that ranked higher than it.  In this case, we'd return 0+0+2+2+2+3=14
            # Another way of thinking about it is to return
            # 0 + 1 + 4 + 5 + 6 + 8 - (0 + 1 + 2 + 3 + 4 + 5)
            # (0 + 1 + 2 + ... + N) == (N * (N + 1) / 2)
            # Note that we include `N` to account for the source embedding itself
            # always being the nearest neighbor
            ranksum += ranks.sum() - (N * (N - 1) / 2)
            nranks += ranks.shape[0]
            labels[neighbors] = 1
            ap_scores += average_precision_score(
                labels, -objDist.detach().cpu().numpy())
            iters += 1
Example #7
0
    def drawGNE():
        """
        visualization of the GNE model
        :return:
        """

        pdf_output = 'path of the result'

        f = open(
            'path of GNE results which can be trained by project on github',
            encoding='utf-8')
        content = f.read()
        dict = json.loads(content)
        embeddings = dict['coordinates']

        tree, total_level, all_leaves = etl.prepare_tree(
            'path to the tree file')

        fig = plt.figure(figsize=(20, 20))
        ax = fig.add_subplot(1, 1, 1)
        plt.style.use('bmh')

        xx = []
        yy = []
        for index in range(len(embeddings)):

            each = embeddings[index]
            x = each[0] * 1
            y = each[1] * 1
            x = float(x)
            y = float(y)
            xx.append(x)
            yy.append(y)

        edges_set = set()

        for node in tree:
            path = node.path
            for i in range(len(path) - 1):
                if ((path[i], path[i + 1]) not in edges_set):
                    edges_set.add((path[i], path[i + 1]))

        for each in list(edges_set):

            xxx = []
            yyy = []
            x1 = float(embeddings[each[0]][0] * 1)
            xxx.append(x1)
            y1 = float(embeddings[each[0]][1] * 1)
            yyy.append(y1)
            x2 = float(embeddings[each[1]][0] * 1)
            xxx.append(x2)
            y2 = float(embeddings[each[1]][1] * 1)
            yyy.append(y2)
            ax.plot(xxx, yyy, '-', label='debug', linewidth=2, color='#2F54EB')

        ax.plot(xx,
                yy,
                'o',
                label='debug',
                marker='o',
                markersize=20,
                color='#F64C4C')
        plt.style.use('bmh')
        plt.show()

        pp = PdfPages(pdf_output)
        pp.savefig(fig)
        pp.close()
Example #8
0
    def drawPoincare():
        """
        draw poincare vis
        :return:
        """
        model = torch.load(
            "path to poincare results, can be train by gensim or project on github"
        )
        embeddings = model['embeddings']
        pdf_output = 'path to the result'

        tree, total_level, all_leaves = etl.prepare_tree('path to tree file')
        fig = plt.figure(figsize=(15, 15))

        ax = fig.add_subplot(1, 1, 1)

        xx = []
        yy = []
        for index in range(len(embeddings)):

            each = embeddings[index]
            x = each[0] * 1000
            y = each[1] * 1000
            x = float(x)
            y = float(y)
            xx.append(x)
            yy.append(y)

        edges_set = set()

        for node in tree:
            path = node.path
            for i in range(len(path) - 1):
                if ((path[i], path[i + 1]) not in edges_set):
                    edges_set.add((path[i], path[i + 1]))

        for each in list(edges_set):
            xxx = []
            yyy = []
            x1 = float(embeddings[each[0]][0] * 1000)
            xxx.append(x1)
            y1 = float(embeddings[each[0]][1] * 1000)
            yyy.append(y1)
            x2 = float(embeddings[each[1]][0] * 1000)
            xxx.append(x2)
            y2 = float(embeddings[each[1]][1] * 1000)
            yyy.append(y2)
            ax.plot(xxx, yyy, '-', label='debug', linewidth=2, color='#2F54EB')

        ax.plot(xx,
                yy,
                'o',
                label='debug',
                marker='o',
                markersize=15,
                color='#F64C4C')

        plt.show()

        pp = PdfPages(pdf_output)
        pp.savefig(fig)
        pp.close()