def test_metis(): if os.path.exists(METIS_ARGS_TEMPFILE): print 'Loading metis args from %s' % METIS_ARGS_TEMPFILE args = json_load(METIS_ARGS_TEMPFILE) else: print 'Using simple metis args' args = { 'nparts': 2, 'adjacency': [[0, 2, 3], [1, 2], [0, 1, 2], [0, 3]], 'eweights': [1073741824, 429496736, 357913952, 1073741824, 536870912, 429496736, 536870912, 1073741824, 357913952, 1073741824], } assert len(args['eweights']) == sum(map(len, args['adjacency'])) print 'Running unweighted metis...' unweighted = dict(args) del unweighted['eweights'] edge_cut, partition = pymetis.part_graph(**unweighted) print 'Finished unweighted metis' print 'Running metis...' edge_cut, partition = pymetis.part_graph(**args) print 'Finished metis'
def test_cliques(): adjacency_list = [ np.array([1, 2]), np.array([0, 2]), np.array([0, 1]) ] num_clusters = 2 pymetis.part_graph(num_clusters, adjacency=adjacency_list)
def test_unconnected(): adjacency_list = [ np.array([2]), np.array([]), np.array([0]) ] num_clusters = 2 pymetis.part_graph(num_clusters, adjacency=adjacency_list)
def __init__(self,Nx,Ny,Nz,numParts,adjList,numTrials = 2000, style = '1D'): """ Nx - number of lattice points in the x-direction (int) Ny - number of lattice points in the y-direction (int) Nz - number of lattice points in the z-direction (int) numParts - number of partitions to form (int) adjList - adjacency list (dictionary) numTrials - number of attempts that the randomized partition advisor should use to find a good partitioning style - '1D', '3D','metis' partition style """ self.Nx = Nx; self.Ny = Ny; self.Nz = Nz self.numParts = numParts self.numTrials = numTrials self.style = style self.adjList = adjList if style=='1D': self.px = 1; self.py = 1; self.pz = self.numParts; elif style=='3D': [self.px,self.py,self.pz] = ps.part_advisor(self.Nx,self.Ny,self.Nz, self.numParts, self.numTrials) if (style == '1D' or style == '3D'): self.part_vert = pc.set_geometric_partition(self.Nx, self.Ny, self.Nz, self.px, self.py, self.pz) else: if (NO_PYMETIS==1): print "pymetis partitioning selected but not available" sys.exit() [cuts, self.part_vert] = part_graph(self.numParts,self.adjList)
def partition_metis(g, num_part): """ Partition a grid using metis. This function requires that pymetis is installed, as can be done by pip install pymetis This will install metis itself in addition to the python bindings. There are other python bindings for metis as well, but pymetis has behaved well so far. Parameters: g: core.grids.grid: To be partitioned. Only the cell_face attribute is used num_part (int): Number of partitions. Returns: np.array (size:g.num_cells): Partition vector, one number in [0, num_part) for each cell. """ # Connection map between cells c2c = g.cell_connection_map() # Convert the cells into the format required by pymetis adjacency_list = [c2c.getrow(i).indices for i in range(c2c.shape[0])] # Call pymetis part = pymetis.part_graph(10, adjacency=adjacency_list) # The meaning of the first number returned by pymetis is not clear (poor # documentation), only return the partitioning. return np.array(part[1])
def gen_social_sfn(filename,num_parts,min_edge=1, max_edge=10, alpha=0.5, beta=0.45, gamma=0.05, delta_in=0.2, delta_out=0, create_using=None, seed=None): f = open(filename, 'r') if(f): neighbors = {} parts = {} G = nx.Graph() for line in f: elems = line.split(' ') i = int(elems[0].strip())-1 j= int(elems[1].strip())-1 G.add_edge(i,j) # nx.set_node_attributes(G, 'part', parts) # nx.set_node_attributes(G, 'neighbors', neighbors) adjacency = {} neighbors = {} for i in range(len(G)): adjacency[i] = [] nbrs = {} for j in G.neighbors(i): adjacency[i].append(j) nbrs[j] = randint(min_edge, max_edge) neighbors[i] = nbrs cuts, part_vert = part_graph(num_parts, adjacency) nx.set_node_attributes(G, 'neighbors', neighbors) parts = {} for i in range(len(part_vert)): parts[i] = part_vert[i] nx.set_node_attributes(G, 'part', parts) f.close() return G
def distribute_mesh(self, mesh, partition=None): assert self.is_head_rank if partition is None: partition = len(self.ranks) # compute partition using Metis, if necessary if isinstance(partition, int): from pymetis import part_graph dummy, partition = part_graph(partition, mesh.element_adjacency_graph()) from hedge.partition import partition_mesh from hedge.mesh import TAG_RANK_BOUNDARY for part_data in partition_mesh( mesh, partition, part_bdry_tag_factory=TAG_RANK_BOUNDARY): rank_data = RankData( mesh=part_data.mesh, global2local_elements=part_data.global2local_elements, global2local_vertex_indices=part_data.global2local_vertex_indices, neighbor_ranks=part_data.neighbor_parts, global_periodic_opposite_faces=part_data.global_periodic_opposite_faces, tag_to_elements=part_data.tag_to_elements) rank = part_data.part_nr if rank == self.head_rank: result = rank_data else: print "send rank", rank self.communicator.send(rank_data, rank, 0) print "end send", rank return result
def relocate(self, mapping_table: dict, util_number: int): if util_number % self.relocation_cycle == 0: # partition graph n_ag = self.context['account_group'] vweights = list(np.sqrt(self.weight_vertex).astype(int)) weight_edge = self.weight_edge.astype(int) eweights = [] adjacency_list = [] for i in range(n_ag): adj = [] for j in range(n_ag): if i != j and weight_edge[i][j] != 0: adj.append(j) eweights.append(weight_edge[i][j]) adjacency_list.append(np.array(adj)) n_cuts, membership = pymetis.part_graph( self.context['number_of_shard'], adjacency=adjacency_list, vweights=vweights, eweights=eweights) shards = np.zeros(self.context['number_of_shard']) for i in range(n_ag): mapping_table[str(i)] = membership[i] shards[membership[i]] += vweights[i] return mapping_table else: return mapping_table
def main(): import numpy as np from math import pi, cos, sin from meshpy.tet import MeshInfo, build from meshpy.geometry import \ GeometryBuilder, generate_surface_of_revolution, EXT_CLOSED_IN_RZ big_r = 3 little_r = 1.5 points = 50 dphi = 2*pi/points rz = np.array([[big_r+little_r*cos(i*dphi), little_r*sin(i*dphi)] for i in range(points)]) geo = GeometryBuilder() geo.add_geometry( *generate_surface_of_revolution(rz, closure=EXT_CLOSED_IN_RZ, radial_subdiv=20)) mesh_info = MeshInfo() geo.set(mesh_info) mesh = build(mesh_info) def tet_face_vertices(vertices): return [(vertices[0], vertices[1], vertices[2]), (vertices[0], vertices[1], vertices[3]), (vertices[0], vertices[2], vertices[3]), (vertices[1], vertices[2], vertices[3]), ] face_map = {} for el_id, el in enumerate(mesh.elements): for fid, face_vertices in enumerate(tet_face_vertices(el)): face_map.setdefault(frozenset(face_vertices), []).append((el_id, fid)) adjacency = {} for face_vertices, els_faces in face_map.items(): if len(els_faces) == 2: (e1, f1), (e2, f2) = els_faces adjacency.setdefault(e1, []).append(e2) adjacency.setdefault(e2, []).append(e1) from pymetis import part_graph cuts, part_vert = part_graph(17, adjacency) try: import pyvtk except ImportError: print("Test succeeded, but could not import pyvtk to visualize result") else: vtkelements = pyvtk.VtkData( pyvtk.UnstructuredGrid(mesh.points, tetra=mesh.elements), "Mesh", pyvtk.CellData(pyvtk.Scalars(part_vert, name="partition"))) vtkelements.tofile('split.vtk')
def test_tet_mesh(visualize=False): pytest.importorskip("meshpy") from math import pi, cos, sin from meshpy.tet import MeshInfo, build from meshpy.geometry import \ GeometryBuilder, generate_surface_of_revolution, EXT_CLOSED_IN_RZ pytest.importorskip("meshpy") big_r = 3 little_r = 1.5 points = 50 dphi = 2 * pi / points rz = np.array( [[big_r + little_r * cos(i * dphi), little_r * sin(i * dphi)] for i in range(points)]) geo = GeometryBuilder() geo.add_geometry(*generate_surface_of_revolution( rz, closure=EXT_CLOSED_IN_RZ, radial_subdiv=20)) mesh_info = MeshInfo() geo.set(mesh_info) mesh = build(mesh_info) def tet_face_vertices(vertices): return [ (vertices[0], vertices[1], vertices[2]), (vertices[0], vertices[1], vertices[3]), (vertices[0], vertices[2], vertices[3]), (vertices[1], vertices[2], vertices[3]), ] face_map = {} for el_id, el in enumerate(mesh.elements): for fid, face_vertices in enumerate(tet_face_vertices(el)): face_map.setdefault(frozenset(face_vertices), []).append( (el_id, fid)) adjacency = {} for face_vertices, els_faces in face_map.items(): if len(els_faces) == 2: (e1, f1), (e2, f2) = els_faces adjacency.setdefault(e1, []).append(e2) adjacency.setdefault(e2, []).append(e1) cuts, part_vert = pymetis.part_graph(17, adjacency) if visualize: import pyvtk vtkelements = pyvtk.VtkData( pyvtk.UnstructuredGrid(mesh.points, tetra=mesh.elements), "Mesh", pyvtk.CellData(pyvtk.Scalars(part_vert, name="partition"))) vtkelements.tofile('split.vtk')
def divide_cluster(self, adj_list, points): pprint("in divide cluster") reverse_point = dict() i = 0 for point in points: reverse_point[(point.x, point.y)] = i i += 1 (edgecuts, parts) = pymetis.part_graph(2, adj_list) left_num = 0 right_num = 0 point_list_left = [] point_list_right = [] left_graph = [] right_graph = [] left_dict = dict() right_dict = dict() for i in range(len(parts)): if parts[i] == 0: left_dict[(points[i].x, points[i].y)] = left_num left_num += 1 point_list_left.append(points[i]) left_graph.append([]) else: right_dict[(points[i].x, points[i].y)] = right_num right_num += 1 point_list_right.append(points[i]) right_graph.append([]) i = 0 for point in point_list_left: temp_key = (point.x, point.y) if temp_key in left_dict: temp_list = left_graph[i] adj_index = reverse_point[temp_key] for mapped_index in adj_list[adj_index]: temp_temp_key = (points[mapped_index].x, points[mapped_index].y) if temp_temp_key in left_dict: temp_list.append(left_dict[temp_temp_key]) i += 1 i = 0 for point in point_list_right: temp_key = (point.x, point.y) if temp_key in right_dict: temp_list = right_graph[i] adj_index = reverse_point[temp_key] for mapped_index in adj_list[adj_index]: temp_temp_key = (points[mapped_index].x, points[mapped_index].y) if temp_temp_key in right_dict: temp_list.append(right_dict[temp_temp_key]) i += 1 return (left_num, right_num, point_list_left, point_list_right, left_graph, right_graph)
def metis(A, parts) : from collections import defaultdict from pymetis import part_graph adj = defaultdict(list) for i in range(A.shape[0]): adj[i] = list(A.indices[A.indptr[i]:A.indptr[i+1]]) return part_graph(parts, adj)
def metis(A, parts): from collections import defaultdict from pymetis import part_graph adj = defaultdict(list) for i in range(A.shape[0]): adj[i] = list(A.indices[A.indptr[i]:A.indptr[i + 1]]) return part_graph(parts, adj)
def test_tet_mesh(visualize=False): from math import pi, cos, sin from meshpy.tet import MeshInfo, build from meshpy.geometry import GeometryBuilder, generate_surface_of_revolution, EXT_CLOSED_IN_RZ pytest.importorskip("meshpy") big_r = 3 little_r = 1.5 points = 50 dphi = 2*pi/points rz = np.array([[big_r+little_r*cos(i*dphi), little_r*sin(i*dphi)] for i in range(points)]) geo = GeometryBuilder() geo.add_geometry( *generate_surface_of_revolution(rz, closure=EXT_CLOSED_IN_RZ, radial_subdiv=20)) mesh_info = MeshInfo() geo.set(mesh_info) mesh = build(mesh_info) def tet_face_vertices(vertices): return [(vertices[0], vertices[1], vertices[2]), (vertices[0], vertices[1], vertices[3]), (vertices[0], vertices[2], vertices[3]), (vertices[1], vertices[2], vertices[3]), ] face_map = {} for el_id, el in enumerate(mesh.elements): for fid, face_vertices in enumerate(tet_face_vertices(el)): face_map.setdefault(frozenset(face_vertices), []).append((el_id, fid)) adjacency = {} for face_vertices, els_faces in face_map.items(): if len(els_faces) == 2: (e1, f1), (e2, f2) = els_faces adjacency.setdefault(e1, []).append(e2) adjacency.setdefault(e2, []).append(e1) import ipdb; ipdb.set_trace() cuts, part_vert = pymetis.part_graph(17, adjacency) if visualize: import pyvtk vtkelements = pyvtk.VtkData( pyvtk.UnstructuredGrid(mesh.points, tetra=mesh.elements), "Mesh", pyvtk.CellData(pyvtk.Scalars(part_vert, name="partition"))) vtkelements.tofile('split.vtk')
def cover(socp_data, N): """stacks the socp data and partitions it into N local dicts describing constraints R <= s""" n = socp_data['c'].shape[0] # form the Laplacian and use pymetis to partition L = form_laplacian(socp_data) graph = nx.from_scipy_sparse_matrix(L) cuts, part_vert = pm.part_graph(N, graph) return part_vert[n:]
def makePartition(Nx, Ny, numParts): """ Nx = number of board positions in the X-direction Ny = number of board positions in the Y-direction numParts = the number of partitions to make returns partList = list of which partitions each board position will be in """ adjDict = set_adjacency(Nx, Ny, ex, ey) cuts, partList = part_graph(numParts, adjDict) return partList
def mcla(labels, nclass, random_state): """Meta-CLustering Algorithm (MCLA). Parameters ---------- labels: Labels generated by multiple clustering algorithms such as K-Means. nclass: Number of classes in a consensus clustering label. random_state: Used for reproducible results. Return ------- label_ce: Consensus clustering label obtained from MCLA. """ np.random.seed(random_state) # Construct Meta-graph H = create_hypergraph(labels) n_cols = H.shape[1] W = sparse.identity(n_cols, dtype=float, format="lil") for i in range(n_cols): hi = H.getcol(i) norm_hi = (hi.T * hi)[0, 0] for j in range(n_cols): if i < j: hj = H.getcol(j) norm_hj = (hj.T * hj)[0, 0] inner_prod = (hi.T * hj)[0, 0] W[i, j] = inner_prod / (norm_hi + norm_hj - inner_prod) W[j, i] = W[i, j] W *= 1e3 W = W.astype(int) # Cluster Hyperedges xadj, adjncy, eweights = to_pymetis_format(W) membership = pymetis.part_graph(nparts=nclass, xadj=xadj, adjncy=adjncy, eweights=eweights)[1] # Collapse Meta-clusters meta_clusters = sparse.dok_matrix((labels.shape[1], nclass), dtype=float).tolil() for i, v in enumerate(membership): meta_clusters[:, v] += H.getcol(i) # Compete for Objects label_ce = np.empty(labels.shape[1], dtype=int) for i, v in enumerate(meta_clusters): v = v.toarray()[0] label_ce[i] = np.random.choice(np.nonzero(v == np.max(v))[0]) return label_ce
def metis_clustering(self): """ Clustering the graph with Metis. For details see: """ # (st, parts) = metis.part_graph(self.graph, self.args.cluster_number) (st, parts) = pymetis.part_graph(self.args.cluster_number, adjacency=self.adj) self.clusters = list(set(parts)) self.cluster_membership = { node: membership for node, membership in enumerate(parts) }
def partition(mat, n_parts): """ Partition a directed graph described by a weighted connectivity matrix. Parameters ---------- mat : numpy.ndarray Square weighted connectivity matrix for a directed graph. n_parts : int Number of partitions. Returns ------- part_map : dict of list Dictionary of partitions. The dict keys are the partition identifiers, and the values are the lists of nodes in each partition. """ # Combine weights of directed edges to obtain undirected graph: mat = mat + mat.T # Convert matrix into METIS-compatible form: g = nx.from_numpy_matrix(np.array(mat, dtype=[('weight', int)])) n = g.number_of_nodes() e = g.number_of_edges() xadj = np.empty(n + 1, int) adjncy = np.empty(2 * e, int) eweights = np.empty(2 * e, int) end_node = 0 xadj[0] = 0 for i in g.node: for j, a in g.edge[i].items(): adjncy[end_node] = j eweights[end_node] = a['weight'] end_node += 1 xadj[i + 1] = end_node # Compute edge-cut partition: with warnings.catch_warnings(): warnings.simplefilter('ignore') cutcount, part_vert = pymetis.part_graph(n_parts, xadj=xadj, adjncy=adjncy, eweights=eweights) # Find nodes in each partition: part_map = {} for i, p in enumerate(set(part_vert)): ind = np.where(np.array(part_vert) == p)[0] part_map[p] = ind return part_map
def get_partition_by_pymetis(mesh, num_parts, *, connectivity="facial", **kwargs): """Return a mesh partition created by :mod:`pymetis`. :arg mesh: A :class:`meshmode.mesh.Mesh` instance :arg num_parts: the number of parts in the mesh partition :arg connectivity: the adjacency graph to be used for partitioning. Either ``"facial"`` or ``"nodal"`` (based on vertices). :arg kwargs: Passed unmodified to :func:`pymetis.part_graph`. :returns: a :class:`numpy.ndarray` with one entry per element indicating to which partition each element belongs, with entries between ``0`` and ``num_parts-1``. .. versionchanged:: 2020.2 *connectivity* was added. """ if connectivity == "facial": # shape: (2, n_el_pairs) neighbor_el_pairs = np.hstack([ np.array([ fagrp.elements + mesh.groups[fagrp.igroup].element_nr_base, fagrp.neighbors + mesh.groups[fagrp.ineighbor_group].element_nr_base ]) for fadj in mesh.facial_adjacency_groups for to_grp, fagrp in fadj.items() if fagrp.ineighbor_group is not None ]) sorted_neighbor_el_pairs = neighbor_el_pairs[:, np.argsort( neighbor_el_pairs[0])] xadj = np.searchsorted(sorted_neighbor_el_pairs[0], np.arange(mesh.nelements + 1)) adjncy = sorted_neighbor_el_pairs[1] elif connectivity == "nodal": xadj = mesh.nodal_adjacency.neighbors_starts.tolist() adjncy = mesh.nodal_adjacency.neighbors.tolist() else: raise ValueError("invalid value of connectivity") from pymetis import part_graph _, p = part_graph(num_parts, xadj=xadj, adjncy=adjncy, **kwargs) return np.array(p)
def partition(mat, n_parts): """ Partition a directed graph described by a weighted connectivity matrix. Parameters ---------- mat : numpy.ndarray Square weighted connectivity matrix for a directed graph. n_parts : int Number of partitions. Returns ------- part_map : dict of list Dictionary of partitions. The dict keys are the partition identifiers, and the values are the lists of nodes in each partition. """ # Combine weights of directed edges to obtain undirected graph: mat = mat+mat.T # Convert matrix into METIS-compatible form: g = nx.from_numpy_matrix(np.array(mat, dtype=[('weight', int)])) n = g.number_of_nodes() e = g.number_of_edges() xadj = np.empty(n+1, int) adjncy = np.empty(2*e, int) eweights = np.empty(2*e, int) end_node = 0 xadj[0] = 0 for i in g.node: for j, a in g.edge[i].items(): adjncy[end_node] = j eweights[end_node] = a['weight'] end_node += 1 xadj[i+1] = end_node # Compute edge-cut partition: with warnings.catch_warnings(): warnings.simplefilter('ignore') cutcount, part_vert = pymetis.part_graph(n_parts, xadj=xadj, adjncy=adjncy, eweights=eweights) # Find nodes in each partition: part_map = {} for i, p in enumerate(set(part_vert)): ind = np.where(np.array(part_vert) == p)[0] part_map[p] = ind return part_map
def run_pymetis(J, nparts): ''' ''' # print('running {0}-way partitioning...'.format(nparts)) # run pymetis partitioning adj_list = nx.to_dict_of_lists(nx.Graph(J)) adj_list = [adj_list[k] for k in range(len(adj_list))] ncuts, labels = part_graph(nparts, adjacency=adj_list) # get indices of each partition parts = [[] for _ in range(nparts)] for i, p in enumerate(labels): parts[p].append(i) return parts
def graph_partition(g: nx.DiGraph, n_part: int): edges = ((node, np.array(list(edges))) for node, edges in g.to_undirected().adjacency()) nodes, adj_list = zip(*sorted(edges, key=lambda i: i[0])) _, membership = pymetis.part_graph(n_part, adj_list) partitions = [set() for _ in range(n_part)] for node, member in zip(nodes, membership): partitions[member].add(node) return list(map(sorted, partitions)), { node: member for node, member in zip(nodes, membership) }
def signal_partition(signals, n_part=100, binarize_t=.5): signals = signal_concat(signals) print('Inside signal_partition signal concat shape is {}'.format( signals.shape)) adj = adjacency_correlation(signals) badj = binarize(np.copy(adj), binarize_t) start = time.time() partition = pymetis.part_graph(n_part, adj2list(badj))[1] print('PyMetis Partition finished! in {} secs'.format(time.time() - start)) node_splits = [[i for i, p in enumerate(partition) if p == val] for val in range(n_part)] splits = [[signals[indices, :] for indices in node_splits]] return node_splits, splits
def get_partition_by_pymetis(mesh, num_parts, **kwargs): """Return a mesh partition created by :mod:`pymetis`. :arg mesh: A :class:`meshmode.mesh.Mesh` instance :arg num_parts: the number of parts in the mesh partition :arg kwargs: Passed unmodified to :func:`pymetis.part_graph`. :returns: a :class:`numpy.ndarray` with one entry per element indicating to which partition each element belongs, with entries between ``0`` and ``num_parts-1``. """ from pymetis import part_graph _, p = part_graph(num_parts, xadj=mesh.nodal_adjacency.neighbors_starts.tolist(), adjncy=mesh.nodal_adjacency.neighbors.tolist(), **kwargs) return np.array(p)
def part_graph(cluster_number, adjacency_matrix, log=False): adjacency_list = adjacency_matrix_to_adjacency_list(adjacency_matrix) cut_count, part_vert = pymetis.part_graph(cluster_number, adjacency=adjacency_list) # Find nodes in each partition: part_map = {} for i, p in enumerate(set(part_vert)): part_map[p] = np.argwhere(np.array(part_vert) == p).ravel() if log: for part, nodes in part_map.items(): print('part %s, nodes quantity: %s' % (part, len(nodes))) print(nodes) return part_map, cut_count, part_vert
def __init__(self, Nx, Ny, Nz, numParts, adjList, numTrials=2000, style='1D'): """ Nx - number of lattice points in the x-direction (int) Ny - number of lattice points in the y-direction (int) Nz - number of lattice points in the z-direction (int) numParts - number of partitions to form (int) adjList - adjacency list (dictionary) numTrials - number of attempts that the randomized partition advisor should use to find a good partitioning style - '1D', '3D','metis' partition style """ self.Nx = Nx self.Ny = Ny self.Nz = Nz self.numParts = numParts self.numTrials = numTrials self.style = style self.adjList = adjList if style == '1D': self.px = 1 self.py = 1 self.pz = self.numParts elif style == '3D': [self.px, self.py, self.pz] = ps.part_advisor(self.Nx, self.Ny, self.Nz, self.numParts, self.numTrials) if (style == '1D' or style == '3D'): self.part_vert = pc.set_geometric_partition( self.Nx, self.Ny, self.Nz, self.px, self.py, self.pz) else: if (NO_PYMETIS == 1): print("pymetis partitioning selected but not available") sys.exit() [cuts, self.part_vert] = part_graph(self.numParts, self.adjList)
def pymetis_partition(graph, node_list, limit): "Partition the graph using metis." node_set = set(node_list) node_map = dict((node, i) for i, node in enumerate(node_list)) adj_lists = dict((node_map[node], list(node_map[other_node] for other_node in node_set & set(graph[node].keys()))) for node in node_list) from pymetis import part_graph num_part = int(math.ceil(float(len(node_list)) / limit)) cuts, part_vert = part_graph(num_part, adj_lists) results = [[] for _ in range(max(part_vert) + 1)] node_map_rev = dict((v, k) for k, v in node_map.items()) for node_idx, partition in enumerate(part_vert): results[partition].append(node_map_rev[node_idx]) return results
def MutilevelKwayPartition(k, adj, xadj, w): (edgecuts, parts) = pymetis.part_graph(nparts=k, adjncy=adj, xadj=xadj, eweights=w) print parts for i in range(0, k): PartialHost = [] Partialadjancy = [] for j in range(0, len(parts)): if (parts[j] == i): PartialHost.append(NetworkHost[j]) Partialadjancy.append([j] + adjancy[j]) PartialNetworkHosts.append(PartialHost) Partialadjancys.append(Partialadjancy) return Partialadjancys
def read_sfn_addparts(filename, num_parts, comm, g_algo=FixedProb()): f = open(filename, 'r') if(f): neighbors = {} parts = {} adjacency = {} G = nx.Graph() for line in f: elems = line.split(':') #print elems[0].strip() i = int(elems[0].strip()) G.add_node(i) nbrs_str = elems[1].split(',') nbrs = {} adjacency[i] = [] for nbr_str in nbrs_str: edge = nbr_str.split() if len(edge) == 2: node = int(edge[0].strip()) wt = int(edge[1].strip()) adjacency[i].append(node) nbrs[node] = wt neighbors[i] = nbrs f.close() cuts, part_vert = part_graph(num_parts, adjacency) for i in range(len(part_vert)): parts[i] = part_vert[i] nx.set_node_attributes(G, 'part', parts) nx.set_node_attributes(G, 'neighbors', neighbors) rank = comm.Get_rank() gnodes = [] for i in range(len(G)): if G.node[i]['part'] == rank: gnode = GossipNode(i, G, g_algo) gnodes.append(gnode) return gnodes, G
def partition_metis(g: pp.Grid, num_part: int) -> np.ndarray: """ Partition a grid using metis. This function requires that pymetis is installed, as can be done by pip install pymetis This will install metis itself in addition to the python bindings. There are other python bindings for metis as well, but pymetis has behaved well so far. Parameters: g: core.grids.grid: To be partitioned. Only the cell_face attribute is used num_part (int): Number of partitions. Returns: np.array (size:g.num_cells): Partition vector, one number in [0, num_part) for each cell. """ try: import pymetis except ImportError: warnings.warn( "Could not import pymetis. Partitioning by metis will not work.") raise ImportError("Cannot partition by pymetis") # Connection map between cells c2c = g.cell_connection_map() # Convert the cells into the format required by pymetis adjacency_list = [list(c2c.getrow(i).indices) for i in range(c2c.shape[0])] # Call pymetis # It seems it is important that num_part is an int, not an int. part = pymetis.part_graph(int(num_part), adjacency=adjacency_list) # The meaning of the first number returned by pymetis is not clear (poor # documentation), only return the partitioning. return np.array(part[1])
def pymetis_partition(graph, node_list, limit): "Partition the graph using metis." node_set = set(node_list) node_map = dict((node, i) for i, node in enumerate(node_list)) adj_lists = dict( (node_map[node], list(node_map[other_node] for other_node in node_set & set(graph[node].keys()))) for node in node_list) from pymetis import part_graph num_part = int(math.ceil(float(len(node_list)) / limit)) cuts, part_vert = part_graph(num_part, adj_lists) results = [[] for _ in range(max(part_vert) + 1)] node_map_rev = dict((v, k) for k, v in node_map.items()) for node_idx, partition in enumerate(part_vert): results[partition].append(node_map_rev[node_idx]) return results
def divide_full_nodes(no_workers, grid): """Partition the grid into the given number of workers""" # Initialise a list of nodes node_list = list() # Initialise the dictionary to hold the node partitions full_partition = {} # Initialise the adjacency list used by metis to find the node partitions adjacency = {} # Build a list of nodes (this is primarily done to assign each node an # integer value) for node in node_iterator(grid): node_list.append(node.get_node_id()) # Build the adjacency list # Loop over the nodes for node in node_iterator(grid): node_id = node.get_node_id() node_index = node_list.index(node_id) # Loop over the edges for endpt in endpt_iterator(grid, node_id): endpt_index = node_list.index(endpt) # Make a note of the relation between the two endpoints adjacency.setdefault(node_index, []).append(endpt_index) # Use metis to partition the grid c, partition = part_graph(no_workers, adjacency) # Make note of which node should go to which worker for i in range(len(partition)): full_partition.setdefault(partition[i], []).append(node_list[i]) # Return the partitioning return full_partition
def partition_mesh(mesh, n_parts, use_metis=True, verbose=False): """ Partition the mesh cells into `n_parts` subdomains, using metis, if available. """ output('partitioning mesh into %d subdomains...' % n_parts, verbose=verbose) timer = Timer(start=True) if use_metis: try: from pymetis import part_graph except ImportError: output('pymetis is not available, using naive partitioning!') part_graph = None if use_metis and (part_graph is not None): cmesh = mesh.cmesh cmesh.setup_connectivity(cmesh.dim, cmesh.dim) graph = cmesh.get_conn(cmesh.dim, cmesh.dim) cuts, cell_tasks = part_graph(n_parts, xadj=graph.offsets.astype(int), adjncy=graph.indices.astype(int)) cell_tasks = nm.array(cell_tasks, dtype=nm.int32) else: ii = nm.arange(n_parts) n_cell_parts = mesh.n_el // n_parts + ((mesh.n_el % n_parts) > ii) output('cell counts:', n_cell_parts, verbose=verbose) assert_(sum(n_cell_parts) == mesh.n_el) assert_(nm.all(n_cell_parts > 0)) offs = nm.cumsum(nm.r_[0, n_cell_parts]) cell_tasks = nm.digitize(nm.arange(offs[-1]), offs) - 1 output('...done in', timer.stop(), verbose=verbose) return cell_tasks
def partition_multicomponent_graph(A_scipy, v_per_subdomain=1000): final_labels = -np.ones(A_scipy.shape[0], dtype=np.int) partition_ind_shift = 0 n_components, labels_components = connected_components(A_scipy, directed=False) for component_id in xrange(n_components): component_n = (labels_components == component_id).nonzero()[0] A_sub = A_scipy[component_n, :][:, component_n] num_subpartitions = max(len(component_n) / v_per_subdomain, 2) _, labels_subpartition = pymetis.part_graph(num_subpartitions, get_adjlist(A_sub)) assert max(labels_subpartition) + 1 == num_subpartitions assert np.all(final_labels[component_n] == -1) final_labels[component_n] = np.asarray( labels_subpartition) + partition_ind_shift partition_ind_shift += num_subpartitions assert np.all(final_labels != -1), len((final_labels == -1).nonzero()[0]) return partition_ind_shift, final_labels
def gen_sfn(n, num_parts=4, min_edge=1, max_edge=10, alpha=0.5, beta=0.45, gamma=0.05, delta_in=0.2, delta_out=0, create_using=None, seed=None): G = nx.scale_free_graph(n=n, alpha=alpha, beta=beta, gamma=gamma, delta_in=delta_in, delta_out=delta_out, create_using=create_using, seed=seed) adjacency = {} neighbors = {} for i in range(len(G)): adjacency[i] = [] nbrs = {} for j in G.neighbors(i): adjacency[i].append(j) nbrs[j] = randint(min_edge, max_edge) neighbors[i] = nbrs cuts, part_vert = part_graph(num_parts, adjacency) nx.set_node_attributes(G, 'neighbors', neighbors) parts = {} for i in range(len(part_vert)): parts[i] = part_vert[i] nx.set_node_attributes(G, 'part', parts) return G
def partition_mesh(mesh, n_parts, use_metis=True, verbose=False): """ Partition the mesh cells into `n_parts` subdomains, using metis, if available. """ output('partitioning mesh into %d subdomains...' % n_parts, verbose=verbose) tt = time.clock() if use_metis: try: from pymetis import part_graph except ImportError: output('pymetis is not available, using naive partitioning!') part_graph = None if use_metis and (part_graph is not None): cmesh = mesh.cmesh cmesh.setup_connectivity(cmesh.dim, cmesh.dim) graph = cmesh.get_conn(cmesh.dim, cmesh.dim) cuts, cell_tasks = part_graph(n_parts, xadj=graph.offsets.astype(int), adjncy=graph.indices.astype(int)) cell_tasks = nm.array(cell_tasks, dtype=nm.int32) else: ii = nm.arange(n_parts) n_cell_parts = mesh.n_el / n_parts + ((mesh.n_el % n_parts) > ii) output('cell counts:', n_cell_parts, verbose=verbose) assert_(sum(n_cell_parts) == mesh.n_el) assert_(nm.all(n_cell_parts > 0)) offs = nm.cumsum(nm.r_[0, n_cell_parts]) cell_tasks = nm.digitize(nm.arange(offs[-1]), offs) - 1 output('...done in', time.clock() - tt, verbose=verbose) return cell_tasks
def graph_test(): xy = np.array((tab['x'], tab['y'])).T d_mat = distance_matrix(xy, xy) d_mat[d_mat > 50] = 0 graph = nx.from_numpy_matrix(d_mat) #S = [graph.subgraph(c).copy() for c in nx.connected_components(graph)] #biggest = S[0] def flatten(l: list[list[T]]) -> list[T]: return [entry for sublist in l for entry in sublist] def to_csr(adj_list): xadj = [0] adjncy = [] for sublist in adj_list: xadj.append(xadj[-1] + len(sublist)) adjncy += sublist return xadj, adjncy #ncuts, membership = pymetis.part_graph(50, adj_list) adj_list = [list(i) for i in graph.adj.values()] weights = [[int(1000 / i['weight']) for i in info.values()] for info in graph.adj.values()] xadj, adjncy = to_csr(adj_list) ncuts, membership = pymetis.part_graph(30, xadj=xadj, adjncy=adjncy, eweights=flatten(weights)) print(nx.number_connected_components(graph)) print(np.unique([len(i) for i in list(nx.connected_components(graph))])) layout = dict(zip(range(len(xy)), xy)) nx.draw(graph, layout, node_color=membership, cmap='prism')
def cspa(labels, nclass): """Cluster-based Similarity Partitioning Algorithm (CSPA). Parameters ---------- labels: Labels generated by multiple clustering algorithms such as K-Means. nclass: Number of classes in a consensus clustering label. Return ------- label_ce: Consensus clustering label obtained from CSPA. """ H = create_hypergraph(labels) S = H * H.T xadj, adjncy, eweights = to_pymetis_format(S) membership = pymetis.part_graph(nparts=nclass, xadj=xadj, adjncy=adjncy, eweights=eweights)[1] label_ce = np.array(membership) return label_ce
def hbgf(labels, nclass): """Hybrid Bipartite Graph Formulation (HBGF). Parameters ---------- labels: Labels generated by multiple clustering algorithms such as K-Means. nclass: Number of classes in a consensus clustering label. Return ------- label_ce: Consensus clustering label obtained from HBGF. """ A = create_hypergraph(labels) n_rows, n_cols = A.shape W = sparse.bmat([[sparse.dok_matrix((n_cols, n_cols)), A.T], [A, sparse.dok_matrix((n_rows, n_rows))]]) xadj, adjncy, _ = to_pymetis_format(W) membership = pymetis.part_graph(nparts=nclass, xadj=xadj, adjncy=adjncy, eweights=None)[1] label_ce = np.array(membership[n_cols:]) return label_ce
def partition_pymetis(graph, num_partitions = 2) : print ('partition starts') # obtain an index from 0 onwards. adjacency = {} ele_to_index = {} index_to_ele = {} index = 0 for n in graph.nodes(): if n not in ele_to_index.keys(): ele_to_index[n] = index index_to_ele[index] = n index += 1 print ('index = ', index) print ('which should be the same as num of nodes = ', graph.number_of_nodes()) for n in graph.nodes(): adjacency.setdefault(ele_to_index[n], []) for (m,n) in graph.edges(): adjacency[ele_to_index[m]].append(ele_to_index[n]) cuts, part_vert = pymetis.part_graph(num_partitions, adjacency=adjacency) print ('cuts = ', cuts) all_edges_to_remove = [] for (n,m) in graph.edges(): index_n = ele_to_index[n] index_m = ele_to_index[m] if part_vert[index_n] != part_vert[index_m]: # remove all_edges_to_remove.append( (n,m) ) print ('# edges removed: ', len (all_edges_to_remove)) return all_edges_to_remove
def distribute_mesh(self, mesh, partition=None): assert self.is_head_rank if partition is None: partition = len(self.ranks) # compute partition using Metis, if necessary if isinstance(partition, int): from pymetis import part_graph dummy, partition = part_graph(partition, mesh.element_adjacency_graph()) from hedge.partition import partition_mesh from hedge.mesh import TAG_RANK_BOUNDARY for part_data in partition_mesh( mesh, partition, part_bdry_tag_factory=TAG_RANK_BOUNDARY): rank_data = RankData( mesh=part_data.mesh, global2local_elements=part_data.global2local_elements, global2local_vertex_indices=part_data. global2local_vertex_indices, neighbor_ranks=part_data.neighbor_parts, global_periodic_opposite_faces=part_data. global_periodic_opposite_faces, tag_to_elements=part_data.tag_to_elements) rank = part_data.part_nr if rank == self.head_rank: result = rank_data else: print "send rank", rank self.communicator.send(rank_data, rank, 0) print "end send", rank return result
print 'Total lattice points = %d.' % (Nx * Ny * Nz) print 'Setting adjacency list' adjDict = set_adjacency(int(Nx), int(Ny), int(Nz), ex, ey, ez) N_parts = 8 print 'Nx = %d ' % Nx print 'Ny = %d ' % Ny print 'Nz = %d ' % Nz print 'getting METIS partition' if (NO_PYMETIS == 1): print "pymetis is not available" sys.exit() cuts, part_vert = part_graph(N_parts, adjDict) print 'getting part_advisor partition' px, py, pz = part_advisor(Nx, Ny, Nz, N_parts) # make sure all of these things are integers... Nx = int(Nx) Ny = int(Ny) Nz = int(Nz) px = int(px) py = int(py) pz = int(pz) start1 = time.time() print "set geometric partition 1" part_vert_pa1 = set_geometric_partition(Nx, Ny, Nz, px, py, pz)
def __init__(self, mat, is_symmetric, dtype): from pycuda.tools import DeviceData devdata = DeviceData() # all row indices in the data structure generation code are # "unpermuted" unless otherwise specified self.dtype = numpy.dtype(dtype) self.index_dtype = numpy.int32 self.packed_index_dtype = numpy.uint32 self.threads_per_packet = devdata.max_threads h, w = self.shape = mat.shape if h != w: raise ValueError("only square matrices are supported") self.rows_per_packet = (devdata.shared_memory - 100) \ // (2*self.dtype.itemsize) self.block_count = \ (h + self.rows_per_packet - 1) // self.rows_per_packet # get metis partition ------------------------------------------------- from scipy.sparse import csr_matrix csr_mat = csr_matrix(mat, dtype=self.dtype) from pymetis import part_graph if not is_symmetric: # make sure adjacency graph is undirected adj_mat = csr_mat + csr_mat.T else: adj_mat = csr_mat while True: cut_count, dof_to_packet_nr = part_graph(self.block_count, xadj=adj_mat.indptr, adjncy=adj_mat.indices) # build packet_nr_to_dofs packet_nr_to_dofs = {} for i, packet_nr in enumerate(dof_to_packet_nr): try: dof_packet = packet_nr_to_dofs[packet_nr] except KeyError: packet_nr_to_dofs[packet_nr] = dof_packet = [] dof_packet.append(i) packet_nr_to_dofs = [packet_nr_to_dofs.get(i) for i in range(len(packet_nr_to_dofs))] too_big = False for packet_dofs in packet_nr_to_dofs: if len(packet_dofs) >= self.rows_per_packet: too_big = True break if too_big: old_block_count = self.block_count self.block_count = int(2+1.05*self.block_count) print ("Metis produced a big block at block count " "%d--retrying with %d" % (old_block_count, self.block_count)) continue break assert len(packet_nr_to_dofs) == self.block_count # permutations, base rows --------------------------------------------- new2old_fetch_indices, \ old2new_fetch_indices, \ packet_base_rows = self.find_simple_index_stuff( packet_nr_to_dofs) # find local row cost and remaining_coo ------------------------------- local_row_costs, remaining_coo = \ self.find_local_row_costs_and_remaining_coo( csr_mat, dof_to_packet_nr, old2new_fetch_indices) local_nnz = numpy.sum(local_row_costs) assert remaining_coo.nnz == csr_mat.nnz - local_nnz # find thread assignment for each block ------------------------------- thread_count = len(packet_nr_to_dofs)*self.threads_per_packet thread_assignments, thread_costs = self.find_thread_assignment( packet_nr_to_dofs, local_row_costs, thread_count) max_thread_costs = numpy.max(thread_costs) # build data structure ------------------------------------------------ from pkt_build import build_pkt_data_structure build_pkt_data_structure(self, packet_nr_to_dofs, max_thread_costs, old2new_fetch_indices, csr_mat, thread_count, thread_assignments, local_row_costs) self.packet_base_rows = gpuarray.to_gpu(packet_base_rows) self.new2old_fetch_indices = gpuarray.to_gpu( new2old_fetch_indices) self.old2new_fetch_indices = gpuarray.to_gpu( old2new_fetch_indices) from coordinate import CoordinateSpMV self.remaining_coo_gpu = CoordinateSpMV( remaining_coo, dtype)
def split(self, random_split=False): objective_fn = self.objective.args[0] constraints = self.constraints nparts = self.num_procs if not isinstance(objective_fn, cvxpy.atoms.affine.add_expr.AddExpression): objective_fn += 0 num_funcs = len(objective_fn.args) num_components = len(objective_fn.args) + len(constraints) # The functions are indexed from 0 to num_funcs-1 var_sets = [frozenset(func.variables()) for func in objective_fn.args] var_sets += [frozenset(constraint.variables()) for constraint in constraints] all_vars = self.variables() adj_list = [[] for _ in range(num_components)] # adj_list contains indices, not actual functions funcs_per_var = {} for var in all_vars: # find all functions that contain this variable funcs_per_var[var] = [i for i in range(num_components) if var in var_sets[i]] # add an edge between any two of them for pair in itertools.permutations(funcs_per_var[var], 2): adj_list[pair[0]].append(pair[1]) if not random_split: partition_per_func = pymetis.part_graph(nparts, adjacency=adj_list)[1] else: partition_per_func = np.random.randint(0, nparts, size=num_components) public_vars = [] public_vars_per_partition = [[] for _ in range(nparts)] for var in all_vars: partitions_per_var = list(set([partition_per_func[i] for i in funcs_per_var[var]])) # If this is a public variable if len(partitions_per_var) > 1: public_vars.append(var) for partition in partitions_per_var: public_vars_per_partition[partition].append(var) # Index of functions that belong to each subproblem funcs_per_partition = [[] for _ in range(nparts)] for i in range(num_components): funcs_per_partition[partition_per_func[i]].append(i) subsystems = [] for i in range(nparts): func_indices = [index for index in funcs_per_partition[i] if index < num_funcs] constrs = [constraints[index - num_funcs] for index in funcs_per_partition[i] if index >= num_funcs] sub_objective = sum([objective_fn.args[func_index] for func_index in func_indices]) params = [] local_params = [] for var in public_vars_per_partition[i]: param = cvxpy.Parameter(*var.size, sign=var.sign, value=np.zeros(var.size)) local_param = cvxpy.Parameter(*var.size, sign=var.sign, value=np.zeros(var.size)) params.append(param) local_params.append(local_param) # Add prox term sub_objective += self.rho / 2 * cvxpy.sum_squares(var - param + local_param) # TODO Only deals with minimization problem for now subsystems.append(Subsystem(cvxpy.Minimize(sub_objective), constraints=constrs, public_vars=public_vars_per_partition[i], local_params=local_params, params=params)) return [partition_per_func, public_vars, subsystems]
import matplotlib.pyplot as pt from mesh import make_mesh mesh = make_mesh() # {{{ find connectivity adjacency = {} for a, b, c in mesh.elements: for v1, v2 in [(a,b), (b,c), (c,a)]: for x, y in [(v1, v2), (v2, v1)]: adjacency.setdefault(v1, set()).add(v2) # }}} from pymetis import part_graph points = np.array(mesh.points) elements = np.array(mesh.elements) vweights = points[:,0]**2 cuts, part_vert = part_graph(2, adjacency, #vweights=[int(20*x) for x in vweights] ) pt.triplot(points[:, 0], points[:, 1], elements, color="black", lw=0.1) pt.tripcolor(points[:, 0], points[:, 1], elements, part_vert) pt.tricontour(points[:, 0], points[:, 1], elements, part_vert, colors="black", levels=[0]) pt.show()
#import pymatlab #from pymatlab import Session #m=Session() # http://surfer.nmr.mgh.harvard.edu/fswiki/CorticalParcellation #m.run("[v,l,c] = read_annotation('/home/stephan/Dev/PyWorkspace/cmp/scratch/atlas_creation/cmp/rh.myaparc_33.annot');") # data init verts = np.array( [ [0,1,1], [2,3,2], [2,1,2], [2,5,4], [5,4,3] ] ) faces = np.array( [ [0,1,2], [2,1,4], [3,4,2] ] ).tolist() # select one region, and extract it as subgraph labels = np.array( [ 0,2,2,1,1] ) # create a graph from the mesh h=nx.Graph() for f in faces: # add three edges for each triangle a,b,c = f h.add_edges_from([(a,b),(b,c),(c,a)]) # print h.adjacency_list() # partition the graph cuts, part_vert = part_graph(2, h.adjacency_list()) print "number of cuts", cuts print "partition", part_vert # visualize partition to control using partition as scaler value
x = numpy.random.randn(20, 20) x[5] = 0. x[:, 12] = 0. ax1.spy(x, markersize=5) ax2.spy(x, precision=0.1, markersize=5) ax3.spy(x) ax4.spy(x, precision=0.1) #show() """ spy plot of a networkx graph obj """ import networkx as nx G=nx.karate_club_graph() A=nx.adjacency_matrix(G) fig = figure() ax1 = fig.add_subplot(111) ax1.spy(A) #show() import pymetis cuts, part_vert = pymetis.part_graph(6,G.adjacency_list()) print "number of cuts", cuts
import crash_on_ipy import cPickle as pkl from struct import pack import sys import metis_graph as mg from local_para_small import * import pymetis if __name__ == "__main__": import os os.chdir(dumpFilePath) _g = pkl.load(file("mgA.dump")) nGroup = 10 cut, vers = pymetis.part_graph(nGroup, xadj=_g.xadj, adjncy=_g.adjncy, eweights=_g.eweights) f = open("cg-" + str(nGroup) + ".group", "wb") import struct f.write(struct.pack("i", len(vers))) for i in vers: f.write(struct.pack("i", i)) f.close()
ey = [0.,0.,0.,1.,-1.,0.,0.,1.,1.,-1.,-1.,1.,1.,-1.,-1.] ez = [0.,0.,0.,0.,0.,1.,-1.,1.,1.,1.,1.,-1.,-1.,-1.,-1.] print 'Total lattice points = %d.'%(Nx*Ny*Nz) print 'Setting adjacency list' adjDict = set_adjacency(int(Nx),int(Ny),int(Nz),ex,ey,ez) N_parts = 24 print 'Nx = %d ' % Nx print 'Ny = %d ' % Ny print 'Nz = %d ' % Nz print 'getting METIS partition' cuts, part_vert = part_graph(N_parts,adjDict) print 'getting part_advisor partition' px,py,pz = part_advisor(Nx,Ny,Nz,N_parts) # make sure all of these things are integers... Nx = int(Nx); Ny = int(Ny); Nz = int(Nz) px = int(px); py = int(py); pz = int(pz) part_vert_pa = set_geometric_partition(Nx,Ny,Nz,px,py,pz) part_vert1D = set_geometric_partition(Nx,Ny,Nz,1,1,N_parts) cuts_metis = count_cuts(adjDict,part_vert) cuts_pa = count_cuts(adjDict,part_vert_pa) cuts_1D = count_cuts(adjDict,part_vert1D)
def __init__(self, mat, is_symmetric, dtype): from pycuda.tools import DeviceData devdata = DeviceData() # all row indices in the data structure generation code are # "unpermuted" unless otherwise specified self.dtype = np.dtype(dtype) self.index_dtype = np.int32 self.packed_index_dtype = np.uint32 self.threads_per_packet = devdata.max_threads h, w = self.shape = mat.shape if h != w: raise ValueError("only square matrices are supported") self.rows_per_packet = (devdata.shared_memory - 100) // (2 * self.dtype.itemsize) self.block_count = (h + self.rows_per_packet - 1) // self.rows_per_packet # get metis partition ------------------------------------------------- from scipy.sparse import csr_matrix csr_mat = csr_matrix(mat, dtype=self.dtype) from pymetis import part_graph if not is_symmetric: # make sure adjacency graph is undirected adj_mat = csr_mat + csr_mat.T else: adj_mat = csr_mat while True: cut_count, dof_to_packet_nr = part_graph(int(self.block_count), xadj=adj_mat.indptr, adjncy=adj_mat.indices) # build packet_nr_to_dofs packet_nr_to_dofs = {} for i, packet_nr in enumerate(dof_to_packet_nr): try: dof_packet = packet_nr_to_dofs[packet_nr] except KeyError: packet_nr_to_dofs[packet_nr] = dof_packet = [] dof_packet.append(i) packet_nr_to_dofs = [ packet_nr_to_dofs.get(i) for i in range(len(packet_nr_to_dofs)) ] too_big = False for packet_dofs in packet_nr_to_dofs: if len(packet_dofs) >= self.rows_per_packet: too_big = True break if too_big: old_block_count = self.block_count self.block_count = int(2 + 1.05 * self.block_count) print(("Metis produced a big block at block count " "%d--retrying with %d" % (old_block_count, self.block_count))) continue break assert len(packet_nr_to_dofs) == self.block_count # permutations, base rows --------------------------------------------- ( new2old_fetch_indices, old2new_fetch_indices, packet_base_rows, ) = self.find_simple_index_stuff(packet_nr_to_dofs) # find local row cost and remaining_coo ------------------------------- local_row_costs, remaining_coo = self.find_local_row_costs_and_remaining_coo( csr_mat, dof_to_packet_nr, old2new_fetch_indices) local_nnz = np.sum(local_row_costs) assert remaining_coo.nnz == csr_mat.nnz - local_nnz # find thread assignment for each block ------------------------------- thread_count = len(packet_nr_to_dofs) * self.threads_per_packet thread_assignments, thread_costs = self.find_thread_assignment( packet_nr_to_dofs, local_row_costs, thread_count) max_thread_costs = np.max(thread_costs) # build data structure ------------------------------------------------ from .pkt_build import build_pkt_data_structure build_pkt_data_structure( self, packet_nr_to_dofs, max_thread_costs, old2new_fetch_indices, csr_mat, thread_count, thread_assignments, local_row_costs, ) self.packet_base_rows = gpuarray.to_gpu(packet_base_rows) self.new2old_fetch_indices = gpuarray.to_gpu(new2old_fetch_indices) self.old2new_fetch_indices = gpuarray.to_gpu(old2new_fetch_indices) from .coordinate import CoordinateSpMV self.remaining_coo_gpu = CoordinateSpMV(remaining_coo, dtype)
def find_consensus_grouping(groupings, debug=False): ''' This implements Strehl et al's Meta-Clustering Algorithm [1]. Inputs: groupings - a list of lists of lists of object ids, for example [ [ # sample 0 [0, 1, 2], # sample 0, group 0 [3, 4], # sample 0, group 1 [5] # sample 0, group 2 ], [ # sample 1 [0, 1], # sample 1, group 0 [2, 3, 4, 5] # sample 1, group 1 ] ] Returns: a list of Row instances sorted by (- row.group_id, row.confidence) References: [1] Alexander Strehl, Joydeep Ghosh, Claire Cardie (2002) "Cluster Ensembles - A Knowledge Reuse Framework for Combining Multiple Partitions" Journal of Machine Learning Research http://jmlr.csail.mit.edu/papers/volume3/strehl02a/strehl02a.pdf ''' if not groupings: raise LoomError('tried to find consensus among zero groupings') # ------------------------------------------------------------------------ # Set up consensus grouping problem allgroups = sum(groupings, []) objects = list(set(sum(allgroups, []))) objects.sort() index = {item: i for i, item in enumerate(objects)} vertices = [numpy.array(map(index.__getitem__, g), dtype=numpy.intp) for g in allgroups] contains = numpy.zeros((len(vertices), len(objects)), dtype=numpy.float32) for v, vertex in enumerate(vertices): contains[v, vertex] = 1 # i.e. for u in vertex: contains[v, u] = i # We use the binary Jaccard measure for similarity overlap = numpy.dot(contains, contains.T) diag = overlap.diagonal() denom = (diag.reshape(len(vertices), 1) + diag.reshape(1, len(vertices)) - overlap) similarity = overlap / denom # ------------------------------------------------------------------------ # Format for metis if not (similarity.max() <= 1): raise LoomError('similarity.max() = {}'.format(similarity.max())) similarity *= 2**16 # metis segfaults if this is too large int_similarity = numpy.zeros(similarity.shape, dtype=numpy.int32) int_similarity[:] = numpy.rint(similarity) edges = int_similarity.nonzero() edge_weights = map(int, int_similarity[edges]) edges = numpy.transpose(edges) adjacency = [[] for _ in vertices] for i, j in edges: adjacency[i].append(j) # FIXME is there a better way to choose the final group count? group_count = int(numpy.median(map(len, groupings))) metis_args = { 'nparts': group_count, 'adjacency': adjacency, 'eweights': edge_weights, } if debug: json_dump(metis_args, METIS_ARGS_TEMPFILE, indent=4) edge_cut, partition = pymetis.part_graph(**metis_args) if debug: os.remove(METIS_ARGS_TEMPFILE) # ------------------------------------------------------------------------ # Clean up solution parts = range(group_count) if len(partition) != len(vertices): raise LoomError('metis output vector has wrong length') represents = numpy.zeros((len(parts), len(vertices))) for v, p in enumerate(partition): represents[p, v] = 1 contains = numpy.dot(represents, contains) represent_counts = represents.sum(axis=1) represent_counts[numpy.where(represent_counts == 0)] = 1 # avoid NANs contains /= represent_counts.reshape(group_count, 1) bestmatch = contains.argmax(axis=0) confidence = contains[bestmatch, range(len(bestmatch))] if not all(numpy.isfinite(confidence)): raise LoomError('confidence is nan') nonempty_groups = list(set(bestmatch)) nonempty_groups.sort() reindex = {j: i for i, j in enumerate(nonempty_groups)} grouping = [ Row(row_id=objects[i], group_id=reindex[g], confidence=c) for i, (g, c) in enumerate(izip(bestmatch, confidence)) ] groups = collate((row.group_id, row) for row in grouping) groups.sort(key=len, reverse=True) grouping = [ Row(row_id=row.row_id, group_id=group_id, confidence=row.confidence) for group_id, group in enumerate(groups) for row in group ] grouping.sort(key=lambda x: (x.group_id, -x.confidence, x.row_id)) return grouping