def mch(font_names): print "mch of", len(font_names), font_names distances = numpy.array([self.data[i]["distance_to"][j] for i in font_names for j in font_names if i != j]) # et for Edge Threshold def get_graph(et): g = networkx.Graph() g.add_nodes_from(font_names) edges = [(i, j) for i in font_names for j in font_names if i < j and self.data[i]["distance_to"][j] < et] g.add_edges_from(edges) return g solution = [font_names] # start with one cluster and try to break it apart quantile = 80.0 # we want to start at 40, so double it to start while 2.5 < quantile and 1 == len(solution): quantile = quantile * 0.5 threshold = numpy.percentile(distances, quantile) g = get_graph(threshold) _, solution = networkx_mcl(g) #expand_factor = <expand_factor>, #inflate_factor = <inflate_factor>, #max_loop = <max_loop>, #mult_factor = <mult_factor>) if 1 == len(solution.keys()): return solution # nothing more we can do # if a cluster has more than 5 members, recurse down return [c if 5 >= len(c) else mch(c) for c in solution]
def mcl_cluster(G): M, clusters = mcl_clustering.networkx_mcl(G) get_clusters(M) pp.pprint(M) pp.pprint("Found {} clusters.".format(len(clusters)))
required=True, type=int) parser.add_argument('-c', '--clean', help='clean database before executing script', required=False, default=False, action='store_true') parser.set_defaults(verbose=False) if __name__ == '__main__': args = parser.parse_args() client, G, tagsToNotes = load_graph(**vars(args)) ideasCollection = client.notes.ideas M, clusters = networkx_mcl(G) print("Found {} clusters".format(len(clusters))) for clique in clusters: hashed = hash(frozenset(clique + [ args.weight, ])) correspondingNotes = list( set([note['_id'] for tag in clique for note in tagsToNotes[tag]])) idea = { 'name': None, 'tags': clique, 'size': len(clique), 'notes_ids': correspondingNotes, 'notes_ids_size': len(correspondingNotes), 'algorithm': 'mcl',