and v has vertex betweenness greater than the max edge betweenness """ bests = [] for node in self.levels[level]: node.calculate_v_betweenness() if node.clone.viable_vertices: for v in node.clone.viable_vertices: bests.append((node, v)) node.clone.viable_vertices = [] return bests def convert_to_circles(self): """ Returns: ------- A dict mapping ints (levels) to a list of circles at that level of splitting """ out = {} for k, v in self.levels.iteritems(): out[k] = [[n.graph.vertices[uid].uid for uid in n.graph.adj_list.keys()] for n in v] return out def __str__(self): return str(self.convert_to_circles()) if __name__ == "__main__": ego_net_lists, features, circles = read_data(*argv[1:]) dendrogram = Dendrogram(ego_net_lists[1310])
from __future__ import print_function from data_in import read_data from sys import argv from dendrogram import Dendrogram from modularity import find_best_splits if __name__ == "__main__": """ Finds the best clustering for each of the given ego network files. Stores output to file called submission.csv """ ego_nets = read_data(argv[1]) # Good sets (small) to test on are 25708, and 1310 # Change this variable to change the egonet that it starts reading from # start = 8338 # index = [k for k, v in tup_ls].index(start) index = 0 # use the line above instead if not running from start tup_ls = sorted(ego_nets.iteritems(), key=lambda t: t[1].size) out = open("submission.csv", "w") # out = open("out.txt", "a") # if not running from start, use append instead while index < len(tup_ls): uid, ego_net = tup_ls[index] print("Analyzing ego network {0}".format(uid)) dendrogram = Dendrogram(ego_net) size = ego_net.size best_split = find_best_splits(dendrogram.levels, size) circles = dendrogram.convert_to_circles()[best_split] circ_str = str(uid) + "," + str(len(circles)) + "," circ_str += ";".join([" ".join([str(fid) for fid in circle]) for circle in circles]) print(circ_str, file=out)