Esempio n. 1
0
def setGtThreads(threads):
    import graph_tool.all as gt
    # Check on parallelisation of graph-tools
    if gt.openmp_enabled():
        gt.openmp_set_num_threads(threads)
        sys.stderr.write('\nGraph-tools OpenMP parallelisation enabled:')
        sys.stderr.write(' with ' + str(gt.openmp_get_num_threads()) +
                         ' threads\n')
Esempio n. 2
0
def newNetwork2D(y_idx, sample_names, distMat, x_range, y_range, score_idx=0,
                 betweenness_sample = betweenness_sample_default, use_gpu = False):
    """Wrapper function for thresholdIterate2D and :func:`growNetwork`.

    For a given y_max, constructs networks across x_range and returns a list
    of scores

    Args:
        y_idx (float)
            Maximum y-intercept of boundary, as index into y_range
        sample_names (list)
            Sample names corresponding to distMat (accessed by iterator)
        distMat (numpy.array or NumpyShared)
            Core and accessory distances or NumpyShared describing these in sharedmem
        x_range (list)
            Sorted list of x-intercepts to search
        y_range (list)
            Sorted list of y-intercepts to search
        score_idx (int)
            Index of score from :func:`~PopPUNK.network.networkSummary` to use
            [default = 0]
        betweenness_sample (int)
            Number of sequences per component used to estimate betweenness using
            a GPU. Smaller numbers are faster but less precise [default = 100]
        use_gpu (bool)
            Whether to use cugraph for graph analysis

    Returns:
        scores (list)
            -1 * network score for each of x_range.
            Where network score is from :func:`~PopPUNK.network.networkSummary`
    """
    if gt.openmp_enabled():
        gt.openmp_set_num_threads(1)
    if isinstance(distMat, NumpyShared):
        distMat_shm = shared_memory.SharedMemory(name = distMat.name)
        distMat = np.ndarray(distMat.shape, dtype = distMat.dtype, buffer = distMat_shm.buf)

    y_max = y_range[y_idx]
    i_vec, j_vec, idx_vec = \
            poppunk_refine.thresholdIterate2D(distMat, x_range, y_max)

    # If everything is in the network, skip this boundary
    if len(idx_vec) == distMat.shape[0]:
        scores = [0] * len(x_range)
    else:
        scores = growNetwork(sample_names,
                                i_vec,
                                j_vec,
                                idx_vec,
                                x_range,
                                score_idx,
                                y_idx,
                                betweenness_sample,
                                use_gpu = use_gpu)

    return(scores)
Esempio n. 3
0
def betweenness(rankCommands, Graph, conn, cur):
    gt.openmp_set_num_threads(4) #enable 4 threads for runing algorithm
    before_time = time.time()
    vp = gt.betweenness(Graph.g)[0] #betweenness returns two property map (vertex map and edge map) [0] means use vertex map
    values = vp.get_array()
    idBt = dict()
    for each in Graph.g.vertices():
        idBt[Graph.indexIdDict[each]] = values[each]
    print "Total handling time is: ", (time.time() - before_time)
    slist = sorted(idBt, key = lambda key: idBt[key], reverse = True)
    createTable(rankCommands, slist, idBt, conn, cur)
Esempio n. 4
0
def betweenness(rankCommands, Graph, conn, cur):
    gt.openmp_set_num_threads(4)  #enable 4 threads for runing algorithm
    before_time = time.time()
    vp = gt.betweenness(
        Graph.g
    )[0]  #betweenness returns two property map (vertex map and edge map) [0] means use vertex map
    values = vp.get_array()
    idBt = dict()
    for each in Graph.g.vertices():
        idBt[Graph.indexIdDict[each]] = values[each]
    print "Total handling time is: ", (time.time() - before_time)
    slist = sorted(idBt, key=lambda key: idBt[key], reverse=True)
    createTable(rankCommands, slist, idBt, conn, cur)
Esempio n. 5
0
def closeness(rankCommands, Graph, conn, cur):
    gt.openmp_set_num_threads(4)  #enable 4 threads for runing algorithm
    before_time = time.time()
    c = gt.closeness(Graph.g)
    values = c.get_array()
    idCl = dict()
    for each in Graph.g.vertices():
        if numpy.isnan(values[each]):
            idCl[Graph.indexIdDict[each]] = 0.0
        else:
            idCl[Graph.indexIdDict[each]] = values[each]
    print "Total handling time is: ", (time.time() - before_time)
    slist = sorted(idCl, key=lambda key: idCl[key], reverse=True)
    createTable(rankCommands, slist, idCl, conn, cur)
Esempio n. 6
0
def closeness(rankCommands, Graph, conn, cur):
    gt.openmp_set_num_threads(4) #enable 4 threads for runing algorithm
    before_time = time.time()
    c = gt.closeness(Graph.g) 
    values = c.get_array()
    idCl = dict()
    for each in Graph.g.vertices():
        if numpy.isnan(values[each]):
            idCl[Graph.indexIdDict[each]] = 0.0
        else:   
            idCl[Graph.indexIdDict[each]] = values[each]
    print "Total handling time is: ", (time.time() - before_time)
    slist = sorted(idCl, key = lambda key: idCl[key], reverse = True)
    createTable(rankCommands, slist, idCl, conn, cur)
Esempio n. 7
0
def blockModel(clusterCommands, Graph, conn, cur):
    gt.openmp_set_num_threads(4) #enable 4 threads for runing algorithm
    g = Graph.g
    state = gt.minimize_blockmodel_dl(g)
    b = state.b
    values = b.get_array()
    maxCommID = sorted(values[:])[-1]
    commDict = []
    for i in range(maxCommID+1):
        commDict.append([])
    index = 0
    for each in values:
        nodeID = Graph.indexIdDict[index]
        commDict[each].append(nodeID)
        index += 1
    createTable(clusterCommands, commDict, conn, cur)
Esempio n. 8
0
def refineFit(distMat, sample_names, mean0, mean1, scale,
              max_move, min_move, slope = 2, score_idx = 0,
              unconstrained = False, no_local = False, num_processes = 1,
              betweenness_sample = betweenness_sample_default, use_gpu = False):
    """Try to refine a fit by maximising a network score based on transitivity and density.

    Iteratively move the decision boundary to do this, using starting point from existing model.

    Args:
        distMat (numpy.array)
            n x 2 array of core and accessory distances for n samples
        sample_names (list)
            List of query sequence labels
        mean0 (numpy.array)
            Start point to define search line
        mean1 (numpy.array)
            End point to define search line
        scale (numpy.array)
            Scaling factor of distMat
        max_move (float)
            Maximum distance to move away from start point
        min_move (float)
            Minimum distance to move away from start point
        slope (int)
            Set to 0 for a vertical line, 1 for a horizontal line, or
            2 to use a slope
        score_idx (int)
            Index of score from :func:`~PopPUNK.network.networkSummary` to use
            [default = 0]
        unconstrained (bool)
            If True, search in 2D and change the slope of the boundary
        no_local (bool)
            Turn off the local optimisation step.
            Quicker, but may be less well refined.
        num_processes (int)
            Number of threads to use in the global optimisation step.
            (default = 1)
        betweenness_sample (int)
            Number of sequences per component used to estimate betweenness using
            a GPU. Smaller numbers are faster but less precise [default = 100]
        use_gpu (bool)
            Whether to use cugraph for graph analyses

    Returns:
        optimal_x (float)
            x-coordinate of refined fit
        optimal_y (float)
            y-coordinate of refined fit
    """
    # Optimize boundary - grid search for global minimum
    sys.stderr.write("Trying to optimise score globally\n")

    # load CUDA libraries
    use_gpu = check_and_set_gpu(use_gpu, gpu_lib)

    # Boundary is left of line normal to this point and first line
    gradient = (mean1[1] - mean0[1]) / (mean1[0] - mean0[0])

    if unconstrained:
        if slope != 2:
            raise RuntimeError("Unconstrained optimization and indiv-refine incompatible")

        global_grid_resolution = 20
        x_max_start, y_max_start = decisionBoundary(mean0, gradient)
        x_max_end, y_max_end = decisionBoundary(mean1, gradient)

        if x_max_start < 0 or y_max_start < 0:
            raise RuntimeError("Boundary range below zero")

        x_max = np.linspace(x_max_start, x_max_end, global_grid_resolution, dtype=np.float32)
        y_max = np.linspace(y_max_start, y_max_end, global_grid_resolution, dtype=np.float32)
        sys.stderr.write("Searching core intercept from " +
                         "{:.3f}".format(x_max_start * scale[0]) +
                         " to " + "{:.3f}".format(x_max_end * scale[0]) + "\n")
        sys.stderr.write("Searching accessory intercept from " +
                         "{:.3f}".format(y_max_start * scale[1]) +
                         " to " + "{:.3f}".format(y_max_end * scale[1]) + "\n")

        if use_gpu:
            global_s = map(partial(newNetwork2D,
                                   sample_names = sample_names,
                                   distMat = distMat,
                                   x_range = x_max,
                                   y_range = y_max,
                                   score_idx = score_idx,
                                   betweenness_sample = betweenness_sample,
                                   use_gpu = True),
                           range(global_grid_resolution))
        else:
            if gt.openmp_enabled():
                gt.openmp_set_num_threads(1)

            with SharedMemoryManager() as smm:
                shm_distMat = smm.SharedMemory(size = distMat.nbytes)
                distances_shared_array = np.ndarray(distMat.shape, dtype = distMat.dtype, buffer = shm_distMat.buf)
                distances_shared_array[:] = distMat[:]
                distances_shared = NumpyShared(name = shm_distMat.name, shape = distMat.shape, dtype = distMat.dtype)

                with Pool(processes = num_processes) as pool:
                    global_s = pool.map(partial(newNetwork2D,
                                                sample_names = sample_names,
                                                distMat = distances_shared,
                                                x_range = x_max,
                                                y_range = y_max,
                                                score_idx = score_idx,
                                                betweenness_sample = betweenness_sample,
                                                use_gpu = False),
                                        range(global_grid_resolution))

            if gt.openmp_enabled():
                gt.openmp_set_num_threads(num_processes)

        global_s = np.array(list(chain.from_iterable(global_s)))
        global_s[np.isnan(global_s)] = 1
        min_idx = np.argmin(global_s)
        optimal_x = x_max[min_idx % global_grid_resolution]
        optimal_y = y_max[min_idx // global_grid_resolution]

        if not (optimal_x > x_max_start and optimal_x < x_max_end and \
                optimal_y > y_max_start and optimal_y < y_max_end):
            no_local = True
        elif not no_local:
            # We have a fixed gradient and want to optimised the intercept
            # This parameterisation is a little awkward to match the 1D case:
            # Make two points along the right slope
            gradient = optimal_x / optimal_y # of 1D search
            delta = x_max[1] - x_max[0]
            bounds = [-delta, delta]
            mean1 = (optimal_x + delta, delta * gradient)

    else:
        # Set the range of points to search
        search_length = max_move + ((mean1[0] - mean0[0])**2 + (mean1[1] - mean0[1])**2)**0.5
        global_grid_resolution = 40 # Seems to work
        s_range = np.linspace(-min_move, search_length, num = global_grid_resolution)
        bottom_end = transformLine(s_range[0], mean0, mean1)
        top_end = transformLine(s_range[-1], mean0, mean1)
        min_x, min_y = decisionBoundary(bottom_end, gradient)
        max_x, max_y = decisionBoundary(top_end, gradient)

        if min_x < 0 or min_y < 0:
            raise RuntimeError("Boundary range below zero")
        sys.stderr.write("Search range (" +
                         ",".join(["{:.3f}".format(x) for x in bottom_end * scale]) +
                         ") to (" +
                         ",".join(["{:.3f}".format(x) for x in top_end * scale]) + ")\n")
        sys.stderr.write("Searching core intercept from " +
                         "{:.3f}".format(min_x * scale[0]) +
                         " to " + "{:.3f}".format(max_x * scale[0]) + "\n")
        sys.stderr.write("Searching accessory intercept from " +
                         "{:.3f}".format(min_y * scale[1]) +
                         " to " + "{:.3f}".format(max_y * scale[1]) + "\n")

        i_vec, j_vec, idx_vec = \
            poppunk_refine.thresholdIterate1D(distMat, s_range, slope,
                                              mean0[0], mean0[1],
                                              mean1[0], mean1[1], num_processes)
        if len(idx_vec) == distMat.shape[0]:
            raise RuntimeError("Boundary range includes all points")
        global_s = np.array(growNetwork(sample_names,
                                        i_vec,
                                        j_vec,
                                        idx_vec,
                                        s_range,
                                        score_idx,
                                        betweenness_sample = betweenness_sample,
                                        use_gpu = use_gpu))
        global_s[np.isnan(global_s)] = 1
        min_idx = np.argmin(np.array(global_s))
        if min_idx > 0 and min_idx < len(s_range) - 1:
            bounds = [s_range[min_idx-1], s_range[min_idx+1]]
        else:
            no_local = True
        if no_local:
            optimised_s = s_range[min_idx]

    # Local optimisation around global optimum
    if not no_local:
        sys.stderr.write("Trying to optimise score locally\n")
        local_s = scipy.optimize.minimize_scalar(
                    newNetwork,
                    bounds = bounds,
                    method = 'Bounded', options={'disp': True},
                    args = (sample_names, distMat, mean0, mean1, gradient,
                            slope, score_idx, num_processes,
                            betweenness_sample, use_gpu)
                )
        optimised_s = local_s.x

    # Convert to x_max, y_max if needed
    if not unconstrained or not no_local:
        optimised_coor = transformLine(optimised_s, mean0, mean1)
        if slope == 2:
            optimal_x, optimal_y = decisionBoundary(optimised_coor, gradient)
        else:
            optimal_x = optimised_coor[0]
            optimal_y = optimised_coor[1]

    if optimal_x < 0 or optimal_y < 0:
        raise RuntimeError("Optimisation failed: produced a boundary outside of allowed range\n")

    return optimal_x, optimal_y
Esempio n. 9
0
#!/usr/bin/env python
import sys,os
import time
import pylab as plt
from sbmtm import sbmtm
import graph_tool.all as gt
import numpy as np
from matplotlib import pyplot as plt

gt.openmp_set_num_threads(int(sys.argv[1])) #set num threads
gt.seed_rng(42) #same results

print("Welcome to Topic Modelling")
print("using ",gt.openmp_get_num_threads(), " threads")

if __name__ == '__main__':
	start = time.time()
	print("initialised")
	gt.seed_rng(42)
	print("seed set")
	model = sbmtm()
	print("model created")
	model.load_graph(filename = '/home/filippo/files/graph.xml.gz')
	print("graph loaded")
	print(model.g)
	model.fit(n_init=1, parallel=True, verbose=True)
	#model.fit_overlap(n_init=1, verbose=True, parallel=True)
	#model.plot()
	model.save_data()
	model.dump_model()
	os.system("mv *.csv *.png *.txt *.pkl /home/filippo/files/.")
            load_true_partition=False,
            strm_piece_num=part,
            out_neighbors=out_neighbors,
            in_neighbors=in_neighbors)
else:
    out_neighbors, in_neighbors, N, E, true_partition = load_graph(
        input_filename, load_true_partition=True)

input_graph = gt.Graph()
input_graph.add_edge_list([(i, j) for i in range(len(out_neighbors))
                           if len(out_neighbors[i]) > 0
                           for j in out_neighbors[i][:, 0]])
t0 = timeit.default_timer()
# the parallel switch determines whether MCMC updates are run in parallel, epsilon is the convergence threshold for
# the nodal updates (smaller value is stricter), and the verbose option prints updates on each step of the algorithm.
# Please refer to the graph-tool documentation under graph-tool.inference for details on the input parameters

if args.threads > 0:
    gt.openmp_set_num_threads(args.threads)

graph_tool_partition = gt.minimize_blockmodel_dl(input_graph,
                                                 mcmc_args={'parallel': True},
                                                 mcmc_equilibrate_args={
                                                     'verbose': False,
                                                     'epsilon': 1e-4
                                                 },
                                                 verbose=True)
t1 = timeit.default_timer()
print('\nGraph partition took {} seconds'.format(t1 - t0))
evaluate_partition(true_partition, graph_tool_partition.get_blocks().a)
Esempio n. 11
0
rcParams["ps.usedistiller"] = "xpdf"
rcParams["pdf.compression"] = 9
rcParams["ps.useafm"] = True
rcParams["path.simplify"] = True
rcParams["text.latex.preamble"] = [  #r"\usepackage{times}",
    #r"\usepackage{euler}",
    r"\usepackage{amssymb}",
    r"\usepackage{amsmath}"
]

import scipy
import scipy.stats
import numpy as np
from pylab import *
from numpy import *
import graph_tool.all as gt
import graph_tool.draw
import random as prandom

figure()

try:
    gt.openmp_set_num_threads(1)
except RuntimeError:
    pass

prandom.seed(42)
np.random.seed(42)
gt.seed_rng(42)
Esempio n. 12
0
import graph_tool.all as gt
gt.openmp_set_num_threads(1)  # openmp does not play well with multiprocessing
import numpy as np
import logging


class G:
    """
    Base graph class
    """
    def __init__(self, N, G_in=None):
        if G_in is None:
            self.N = N
            self.g = gt.Graph(directed=False)
            self.g.add_vertex(N)
        else:
            g = G_in.g.copy()
            self.g = gt.Graph(g, prune=False, directed=False)

    def get_shortest_path(self, source, target, weights):
        vertex_list, edge_list = gt.shortest_path(self.g, source, target,
                                                  weights)
        return vertex_list, edge_list

    def get_number_of_edges(self):
        return self.g.num_edges()

    def get_number_of_vertices(self):
        return self.g.num_vertices()

    def purge_vertices(self):
Esempio n. 13
0
rcParams["figure.subplot.bottom"] = 0.2

rcParams["image.cmap"] = "hot"

rcParams["text.usetex"] = True

rcParams["ps.usedistiller"] = "xpdf"
rcParams["pdf.compression"] = 9
rcParams["ps.useafm"] = True
rcParams["path.simplify"] = True
rcParams["text.latex.preamble"] = [#"\usepackage{times}",
                                   #"\usepackage{euler}",
                                   r"\usepackage{amssymb}",
                                   r"\usepackage{amsmath}"]

import scipy
import scipy.stats
import numpy as np
from pylab import *
from numpy import *
import graph_tool.all as gt

figure()

try:
    gt.openmp_set_num_threads(1)
except RuntimeError:
    pass
np.random.seed(42)
gt.seed_rng(42)