Esempio n. 1
0
                and v has vertex betweenness greater than the max edge
                betweenness
        """
        bests = []
        for node in self.levels[level]:
            node.calculate_v_betweenness()
            if node.clone.viable_vertices:
                for v in node.clone.viable_vertices:
                    bests.append((node, v))
                node.clone.viable_vertices = []
        return bests

    def convert_to_circles(self):
        """
            Returns:
            -------
            A dict mapping ints (levels) to a list of circles at that level of
                splitting
        """
        out = {}
        for k, v in self.levels.iteritems():
            out[k] = [[n.graph.vertices[uid].uid for uid in n.graph.adj_list.keys()] for n in v]
        return out

    def __str__(self):
        return str(self.convert_to_circles())

if __name__ == "__main__":
    ego_net_lists, features, circles = read_data(*argv[1:])
    dendrogram = Dendrogram(ego_net_lists[1310])
Esempio n. 2
0
from __future__ import print_function
from data_in import read_data
from sys import argv
from dendrogram import Dendrogram
from modularity import find_best_splits

if __name__ == "__main__":
    """
        Finds the best clustering for each of the given ego network files.
        Stores output to file called submission.csv
    """
    ego_nets = read_data(argv[1])

    # Good sets (small) to test on are 25708, and 1310
    # Change this variable to change the egonet that it starts reading from
    # start = 8338
    # index = [k for k, v in tup_ls].index(start)
    index = 0  # use the line above instead if not running from start
    tup_ls = sorted(ego_nets.iteritems(), key=lambda t: t[1].size)
    out = open("submission.csv", "w")
    # out = open("out.txt", "a")  # if not running from start, use append instead
    while index < len(tup_ls):
        uid, ego_net = tup_ls[index]
        print("Analyzing ego network {0}".format(uid))
        dendrogram = Dendrogram(ego_net)
        size = ego_net.size
        best_split = find_best_splits(dendrogram.levels, size)
        circles = dendrogram.convert_to_circles()[best_split]
        circ_str = str(uid) + "," + str(len(circles)) + ","
        circ_str += ";".join([" ".join([str(fid) for fid in circle]) for circle in circles])
        print(circ_str, file=out)