def setGtThreads(threads): import graph_tool.all as gt # Check on parallelisation of graph-tools if gt.openmp_enabled(): gt.openmp_set_num_threads(threads) sys.stderr.write('\nGraph-tools OpenMP parallelisation enabled:') sys.stderr.write(' with ' + str(gt.openmp_get_num_threads()) + ' threads\n')
def newNetwork2D(y_idx, sample_names, distMat, x_range, y_range, score_idx=0, betweenness_sample = betweenness_sample_default, use_gpu = False): """Wrapper function for thresholdIterate2D and :func:`growNetwork`. For a given y_max, constructs networks across x_range and returns a list of scores Args: y_idx (float) Maximum y-intercept of boundary, as index into y_range sample_names (list) Sample names corresponding to distMat (accessed by iterator) distMat (numpy.array or NumpyShared) Core and accessory distances or NumpyShared describing these in sharedmem x_range (list) Sorted list of x-intercepts to search y_range (list) Sorted list of y-intercepts to search score_idx (int) Index of score from :func:`~PopPUNK.network.networkSummary` to use [default = 0] betweenness_sample (int) Number of sequences per component used to estimate betweenness using a GPU. Smaller numbers are faster but less precise [default = 100] use_gpu (bool) Whether to use cugraph for graph analysis Returns: scores (list) -1 * network score for each of x_range. Where network score is from :func:`~PopPUNK.network.networkSummary` """ if gt.openmp_enabled(): gt.openmp_set_num_threads(1) if isinstance(distMat, NumpyShared): distMat_shm = shared_memory.SharedMemory(name = distMat.name) distMat = np.ndarray(distMat.shape, dtype = distMat.dtype, buffer = distMat_shm.buf) y_max = y_range[y_idx] i_vec, j_vec, idx_vec = \ poppunk_refine.thresholdIterate2D(distMat, x_range, y_max) # If everything is in the network, skip this boundary if len(idx_vec) == distMat.shape[0]: scores = [0] * len(x_range) else: scores = growNetwork(sample_names, i_vec, j_vec, idx_vec, x_range, score_idx, y_idx, betweenness_sample, use_gpu = use_gpu) return(scores)
def betweenness(rankCommands, Graph, conn, cur): gt.openmp_set_num_threads(4) #enable 4 threads for runing algorithm before_time = time.time() vp = gt.betweenness(Graph.g)[0] #betweenness returns two property map (vertex map and edge map) [0] means use vertex map values = vp.get_array() idBt = dict() for each in Graph.g.vertices(): idBt[Graph.indexIdDict[each]] = values[each] print "Total handling time is: ", (time.time() - before_time) slist = sorted(idBt, key = lambda key: idBt[key], reverse = True) createTable(rankCommands, slist, idBt, conn, cur)
def betweenness(rankCommands, Graph, conn, cur): gt.openmp_set_num_threads(4) #enable 4 threads for runing algorithm before_time = time.time() vp = gt.betweenness( Graph.g )[0] #betweenness returns two property map (vertex map and edge map) [0] means use vertex map values = vp.get_array() idBt = dict() for each in Graph.g.vertices(): idBt[Graph.indexIdDict[each]] = values[each] print "Total handling time is: ", (time.time() - before_time) slist = sorted(idBt, key=lambda key: idBt[key], reverse=True) createTable(rankCommands, slist, idBt, conn, cur)
def closeness(rankCommands, Graph, conn, cur): gt.openmp_set_num_threads(4) #enable 4 threads for runing algorithm before_time = time.time() c = gt.closeness(Graph.g) values = c.get_array() idCl = dict() for each in Graph.g.vertices(): if numpy.isnan(values[each]): idCl[Graph.indexIdDict[each]] = 0.0 else: idCl[Graph.indexIdDict[each]] = values[each] print "Total handling time is: ", (time.time() - before_time) slist = sorted(idCl, key=lambda key: idCl[key], reverse=True) createTable(rankCommands, slist, idCl, conn, cur)
def closeness(rankCommands, Graph, conn, cur): gt.openmp_set_num_threads(4) #enable 4 threads for runing algorithm before_time = time.time() c = gt.closeness(Graph.g) values = c.get_array() idCl = dict() for each in Graph.g.vertices(): if numpy.isnan(values[each]): idCl[Graph.indexIdDict[each]] = 0.0 else: idCl[Graph.indexIdDict[each]] = values[each] print "Total handling time is: ", (time.time() - before_time) slist = sorted(idCl, key = lambda key: idCl[key], reverse = True) createTable(rankCommands, slist, idCl, conn, cur)
def blockModel(clusterCommands, Graph, conn, cur): gt.openmp_set_num_threads(4) #enable 4 threads for runing algorithm g = Graph.g state = gt.minimize_blockmodel_dl(g) b = state.b values = b.get_array() maxCommID = sorted(values[:])[-1] commDict = [] for i in range(maxCommID+1): commDict.append([]) index = 0 for each in values: nodeID = Graph.indexIdDict[index] commDict[each].append(nodeID) index += 1 createTable(clusterCommands, commDict, conn, cur)
def refineFit(distMat, sample_names, mean0, mean1, scale, max_move, min_move, slope = 2, score_idx = 0, unconstrained = False, no_local = False, num_processes = 1, betweenness_sample = betweenness_sample_default, use_gpu = False): """Try to refine a fit by maximising a network score based on transitivity and density. Iteratively move the decision boundary to do this, using starting point from existing model. Args: distMat (numpy.array) n x 2 array of core and accessory distances for n samples sample_names (list) List of query sequence labels mean0 (numpy.array) Start point to define search line mean1 (numpy.array) End point to define search line scale (numpy.array) Scaling factor of distMat max_move (float) Maximum distance to move away from start point min_move (float) Minimum distance to move away from start point slope (int) Set to 0 for a vertical line, 1 for a horizontal line, or 2 to use a slope score_idx (int) Index of score from :func:`~PopPUNK.network.networkSummary` to use [default = 0] unconstrained (bool) If True, search in 2D and change the slope of the boundary no_local (bool) Turn off the local optimisation step. Quicker, but may be less well refined. num_processes (int) Number of threads to use in the global optimisation step. (default = 1) betweenness_sample (int) Number of sequences per component used to estimate betweenness using a GPU. Smaller numbers are faster but less precise [default = 100] use_gpu (bool) Whether to use cugraph for graph analyses Returns: optimal_x (float) x-coordinate of refined fit optimal_y (float) y-coordinate of refined fit """ # Optimize boundary - grid search for global minimum sys.stderr.write("Trying to optimise score globally\n") # load CUDA libraries use_gpu = check_and_set_gpu(use_gpu, gpu_lib) # Boundary is left of line normal to this point and first line gradient = (mean1[1] - mean0[1]) / (mean1[0] - mean0[0]) if unconstrained: if slope != 2: raise RuntimeError("Unconstrained optimization and indiv-refine incompatible") global_grid_resolution = 20 x_max_start, y_max_start = decisionBoundary(mean0, gradient) x_max_end, y_max_end = decisionBoundary(mean1, gradient) if x_max_start < 0 or y_max_start < 0: raise RuntimeError("Boundary range below zero") x_max = np.linspace(x_max_start, x_max_end, global_grid_resolution, dtype=np.float32) y_max = np.linspace(y_max_start, y_max_end, global_grid_resolution, dtype=np.float32) sys.stderr.write("Searching core intercept from " + "{:.3f}".format(x_max_start * scale[0]) + " to " + "{:.3f}".format(x_max_end * scale[0]) + "\n") sys.stderr.write("Searching accessory intercept from " + "{:.3f}".format(y_max_start * scale[1]) + " to " + "{:.3f}".format(y_max_end * scale[1]) + "\n") if use_gpu: global_s = map(partial(newNetwork2D, sample_names = sample_names, distMat = distMat, x_range = x_max, y_range = y_max, score_idx = score_idx, betweenness_sample = betweenness_sample, use_gpu = True), range(global_grid_resolution)) else: if gt.openmp_enabled(): gt.openmp_set_num_threads(1) with SharedMemoryManager() as smm: shm_distMat = smm.SharedMemory(size = distMat.nbytes) distances_shared_array = np.ndarray(distMat.shape, dtype = distMat.dtype, buffer = shm_distMat.buf) distances_shared_array[:] = distMat[:] distances_shared = NumpyShared(name = shm_distMat.name, shape = distMat.shape, dtype = distMat.dtype) with Pool(processes = num_processes) as pool: global_s = pool.map(partial(newNetwork2D, sample_names = sample_names, distMat = distances_shared, x_range = x_max, y_range = y_max, score_idx = score_idx, betweenness_sample = betweenness_sample, use_gpu = False), range(global_grid_resolution)) if gt.openmp_enabled(): gt.openmp_set_num_threads(num_processes) global_s = np.array(list(chain.from_iterable(global_s))) global_s[np.isnan(global_s)] = 1 min_idx = np.argmin(global_s) optimal_x = x_max[min_idx % global_grid_resolution] optimal_y = y_max[min_idx // global_grid_resolution] if not (optimal_x > x_max_start and optimal_x < x_max_end and \ optimal_y > y_max_start and optimal_y < y_max_end): no_local = True elif not no_local: # We have a fixed gradient and want to optimised the intercept # This parameterisation is a little awkward to match the 1D case: # Make two points along the right slope gradient = optimal_x / optimal_y # of 1D search delta = x_max[1] - x_max[0] bounds = [-delta, delta] mean1 = (optimal_x + delta, delta * gradient) else: # Set the range of points to search search_length = max_move + ((mean1[0] - mean0[0])**2 + (mean1[1] - mean0[1])**2)**0.5 global_grid_resolution = 40 # Seems to work s_range = np.linspace(-min_move, search_length, num = global_grid_resolution) bottom_end = transformLine(s_range[0], mean0, mean1) top_end = transformLine(s_range[-1], mean0, mean1) min_x, min_y = decisionBoundary(bottom_end, gradient) max_x, max_y = decisionBoundary(top_end, gradient) if min_x < 0 or min_y < 0: raise RuntimeError("Boundary range below zero") sys.stderr.write("Search range (" + ",".join(["{:.3f}".format(x) for x in bottom_end * scale]) + ") to (" + ",".join(["{:.3f}".format(x) for x in top_end * scale]) + ")\n") sys.stderr.write("Searching core intercept from " + "{:.3f}".format(min_x * scale[0]) + " to " + "{:.3f}".format(max_x * scale[0]) + "\n") sys.stderr.write("Searching accessory intercept from " + "{:.3f}".format(min_y * scale[1]) + " to " + "{:.3f}".format(max_y * scale[1]) + "\n") i_vec, j_vec, idx_vec = \ poppunk_refine.thresholdIterate1D(distMat, s_range, slope, mean0[0], mean0[1], mean1[0], mean1[1], num_processes) if len(idx_vec) == distMat.shape[0]: raise RuntimeError("Boundary range includes all points") global_s = np.array(growNetwork(sample_names, i_vec, j_vec, idx_vec, s_range, score_idx, betweenness_sample = betweenness_sample, use_gpu = use_gpu)) global_s[np.isnan(global_s)] = 1 min_idx = np.argmin(np.array(global_s)) if min_idx > 0 and min_idx < len(s_range) - 1: bounds = [s_range[min_idx-1], s_range[min_idx+1]] else: no_local = True if no_local: optimised_s = s_range[min_idx] # Local optimisation around global optimum if not no_local: sys.stderr.write("Trying to optimise score locally\n") local_s = scipy.optimize.minimize_scalar( newNetwork, bounds = bounds, method = 'Bounded', options={'disp': True}, args = (sample_names, distMat, mean0, mean1, gradient, slope, score_idx, num_processes, betweenness_sample, use_gpu) ) optimised_s = local_s.x # Convert to x_max, y_max if needed if not unconstrained or not no_local: optimised_coor = transformLine(optimised_s, mean0, mean1) if slope == 2: optimal_x, optimal_y = decisionBoundary(optimised_coor, gradient) else: optimal_x = optimised_coor[0] optimal_y = optimised_coor[1] if optimal_x < 0 or optimal_y < 0: raise RuntimeError("Optimisation failed: produced a boundary outside of allowed range\n") return optimal_x, optimal_y
#!/usr/bin/env python import sys,os import time import pylab as plt from sbmtm import sbmtm import graph_tool.all as gt import numpy as np from matplotlib import pyplot as plt gt.openmp_set_num_threads(int(sys.argv[1])) #set num threads gt.seed_rng(42) #same results print("Welcome to Topic Modelling") print("using ",gt.openmp_get_num_threads(), " threads") if __name__ == '__main__': start = time.time() print("initialised") gt.seed_rng(42) print("seed set") model = sbmtm() print("model created") model.load_graph(filename = '/home/filippo/files/graph.xml.gz') print("graph loaded") print(model.g) model.fit(n_init=1, parallel=True, verbose=True) #model.fit_overlap(n_init=1, verbose=True, parallel=True) #model.plot() model.save_data() model.dump_model() os.system("mv *.csv *.png *.txt *.pkl /home/filippo/files/.")
load_true_partition=False, strm_piece_num=part, out_neighbors=out_neighbors, in_neighbors=in_neighbors) else: out_neighbors, in_neighbors, N, E, true_partition = load_graph( input_filename, load_true_partition=True) input_graph = gt.Graph() input_graph.add_edge_list([(i, j) for i in range(len(out_neighbors)) if len(out_neighbors[i]) > 0 for j in out_neighbors[i][:, 0]]) t0 = timeit.default_timer() # the parallel switch determines whether MCMC updates are run in parallel, epsilon is the convergence threshold for # the nodal updates (smaller value is stricter), and the verbose option prints updates on each step of the algorithm. # Please refer to the graph-tool documentation under graph-tool.inference for details on the input parameters if args.threads > 0: gt.openmp_set_num_threads(args.threads) graph_tool_partition = gt.minimize_blockmodel_dl(input_graph, mcmc_args={'parallel': True}, mcmc_equilibrate_args={ 'verbose': False, 'epsilon': 1e-4 }, verbose=True) t1 = timeit.default_timer() print('\nGraph partition took {} seconds'.format(t1 - t0)) evaluate_partition(true_partition, graph_tool_partition.get_blocks().a)
rcParams["ps.usedistiller"] = "xpdf" rcParams["pdf.compression"] = 9 rcParams["ps.useafm"] = True rcParams["path.simplify"] = True rcParams["text.latex.preamble"] = [ #r"\usepackage{times}", #r"\usepackage{euler}", r"\usepackage{amssymb}", r"\usepackage{amsmath}" ] import scipy import scipy.stats import numpy as np from pylab import * from numpy import * import graph_tool.all as gt import graph_tool.draw import random as prandom figure() try: gt.openmp_set_num_threads(1) except RuntimeError: pass prandom.seed(42) np.random.seed(42) gt.seed_rng(42)
import graph_tool.all as gt gt.openmp_set_num_threads(1) # openmp does not play well with multiprocessing import numpy as np import logging class G: """ Base graph class """ def __init__(self, N, G_in=None): if G_in is None: self.N = N self.g = gt.Graph(directed=False) self.g.add_vertex(N) else: g = G_in.g.copy() self.g = gt.Graph(g, prune=False, directed=False) def get_shortest_path(self, source, target, weights): vertex_list, edge_list = gt.shortest_path(self.g, source, target, weights) return vertex_list, edge_list def get_number_of_edges(self): return self.g.num_edges() def get_number_of_vertices(self): return self.g.num_vertices() def purge_vertices(self):
rcParams["figure.subplot.bottom"] = 0.2 rcParams["image.cmap"] = "hot" rcParams["text.usetex"] = True rcParams["ps.usedistiller"] = "xpdf" rcParams["pdf.compression"] = 9 rcParams["ps.useafm"] = True rcParams["path.simplify"] = True rcParams["text.latex.preamble"] = [#"\usepackage{times}", #"\usepackage{euler}", r"\usepackage{amssymb}", r"\usepackage{amsmath}"] import scipy import scipy.stats import numpy as np from pylab import * from numpy import * import graph_tool.all as gt figure() try: gt.openmp_set_num_threads(1) except RuntimeError: pass np.random.seed(42) gt.seed_rng(42)