def vertex_partitioning(sc, nodes, edges, num_partition=4): """ implementation of vertex partitioning Kruskal's algorithm :param nodes: nodes for input. :param edges: edges for input. :param num_partition: number of partitions. :return: """ # define function for calculating combinations of different vertex partitions def combine(iterator): for i in iterator: if i[0] < i[1]: yield i[0] + i[1] vertices = sc.parallelize(shuffle(nodes), num_partition).glom() vertices = vertices.cartesian(vertices) \ .mapPartitions(combine, preservesPartitioning=False) # parallelize vertices = sc.parallelize(vertices.collect(), num_partition) # define function for calculating local MSTs def local_kruskal(iterator): for subset in iterator: for edge in kruskal(nodes=set(subset), edges=edges): yield edge # calculate local MSTs subtrees = vertices.mapPartitions(local_kruskal).distinct().collect() # calculate the global MST return kruskal(nodes=nodes, edges=subtrees)
def edge_partitioning(sc, nodes, edges, num_partition=4): """ implementation of edge partitioning Kruskal's algorithm :param nodes: nodes for input. :param edges: edges for input. :param num_partition: number of partitions. :return: """ # parallelize edges = sc.parallelize(shuffle(edges), num_partition) # define function for calculating local MSTs def local_kruskal(iterator): for edge in kruskal(nodes=nodes, edges=iterator): yield edge # calculate local MSTs subtrees = edges.mapPartitions(local_kruskal).collect() # calculate the global MST return kruskal(nodes=nodes, edges=subtrees)
graphdata = f.readline().rstrip('\n').split( ' ' ) #read first line to get graph data, strip newline, and put data into 2-element list graphdata = map(int, graphdata) Input = [] for line in f: sublist = line.rstrip('\n').split( ' ' ) # Remove newline character and split 2-column row into 2-element list Input.append(map(int, sublist)) # Append 4-element rowlist to Input list print graphdata #T = prim(Input,1,graphdata) uf = kruskal(Input, 1, graphdata, 4) ufgroup = uf.groups() print uf.groups() fn = open('output.txt', 'wb') pickle.dump(uf.groups(), fn) print len(uf.groups()) minspace = float("inf") for i in range(1): group1 = [x for x in Input if x[0] in ufgroup[i]] group2 = [x for x in group1 if x[1] not in ufgroup[i]] print group2 minspacetemp = min(group2, key=itemgetter(2)) if minspacetemp < minspace: minspace = minspacetemp
def local_kruskal(iterator): for subset in iterator: for edge in kruskal(nodes=set(subset), edges=edges): yield edge
def local_kruskal(iterator): for edge in kruskal(nodes=nodes, edges=iterator): yield edge
import pickle f = open('clustering1.txt','r') graphdata = f.readline().rstrip('\n').split(' ') #read first line to get graph data, strip newline, and put data into 2-element list graphdata = map(int,graphdata) Input = [] for line in f: sublist = line.rstrip('\n').split(' ') # Remove newline character and split 2-column row into 2-element list Input.append(map(int,sublist)) # Append 4-element rowlist to Input list print graphdata #T = prim(Input,1,graphdata) uf = kruskal(Input,1,graphdata,4) ufgroup = uf.groups() print uf.groups() fn = open('output.txt', 'wb') pickle.dump(uf.groups(), fn) print len(uf.groups()) minspace = float("inf") for i in range(1): group1 = [x for x in Input if x[0] in ufgroup[i] ] group2 = [x for x in group1 if x[1] not in ufgroup[i]] print group2 minspacetemp = min(group2, key = itemgetter(2)) if minspacetemp < minspace: minspace = minspacetemp