def gen_er(dicProperties): np.random.seed() # initialize graph graphER = Graph() nNodes = 0 nEdges = 0 rDens = 0.0 if "Nodes" in dicProperties.keys(): nNodes = dicProperties["Nodes"] graphER.add_vertex(nNodes) if "Edges" in dicProperties.keys(): nEdges = dicProperties["Edges"] rDens = nEdges / float(nNodes**2) dicProperties["Density"] = rDens else: rDens = dicProperties["Density"] nEdges = int(np.floor(rDens*nNodes**2)) dicProperties["Edges"] = nEdges else: nEdges = dicProperties["Edges"] rDens = dicProperties["Density"] nNodes = int(np.floor(np.sqrt(nEdges/rDens))) graphER.add_vertex(nNodes) dicProperties["Nodes"] = nNodes # generate edges numTest,numCurrentEdges = 0,0 while numCurrentEdges != nEdges and numTest < n_MAXTESTS: lstEdges = np.random.randint(0,nNodes,(nEdges-numCurrentEdges,2)) graphER.add_edge_list(lstEdges) # remove loops and duplicate edges remove_self_loops(graphER) remove_parallel_edges(graphER) numCurrentEdges = graphER.num_edges() numTest += 1 graphER.reindex_edges() nEdges = graphER.num_edges() rDens = nEdges / float(nNodes**2) # generate types rInhibFrac = dicProperties["InhibFrac"] lstTypesGen = np.random.uniform(0,1,nEdges) lstTypeLimit = np.full(nEdges,rInhibFrac) lstIsExcitatory = np.greater(lstTypesGen,lstTypeLimit) nExc = np.count_nonzero(lstIsExcitatory) epropType = graphER.new_edge_property("int",np.multiply(2,lstIsExcitatory)-np.repeat(1,nEdges)) # excitatory (True) or inhibitory (False) graphER.edge_properties["type"] = epropType # and weights if dicProperties["Weighted"]: lstWeights = dicGenWeights[dicProperties["Distribution"]](graphER,dicProperties,nEdges,nExc) # generate the weights epropW = graphER.new_edge_property("double",lstWeights) # crée la propriété pour stocker les poids graphER.edge_properties["weight"] = epropW return graphER
def examine_graph(graph: Graph, experiment: str, graphname: str, real: bool, directed: bool = True) -> Properties: vertices = graph.num_vertices() edges = graph.num_edges() total_degrees = graph.get_total_degrees(np.arange(vertices)) min_degree = np.min(total_degrees) max_degree = np.max(total_degrees) avg_degree = vertex_average(graph, "total")[0] largest_component = extract_largest_component( graph, directed=False).num_vertices() num_islands = np.sum(total_degrees == 0) cc = global_clustering(graph)[0] # _degrees, _counts = np.unique(total_degrees, return_counts=True) # log_degrees = np.log(_degrees) # log_counts = np.log(_counts) # regressor = LinearRegression() # regressor.fit(log_degrees.reshape(-1, 1), log_counts) # exponent = regressor.coef_[0] result = powerlaw.Fit(total_degrees, xmin=1, discrete=True, xmax=max_degree) exponent = -result.alpha percentile = np.percentile(total_degrees, 95) # print("Exponent for this graph is: ", exponent) # print("Using powerlaw package: e = {} xmin = {} xmax = {}".format( # exponent2, result.xmin, result.xmax)) # print("degrees: {}\ncounts: {}".format(_degrees[:20], _counts[:20])) return Properties(experiment, graphname, real, vertices, edges, min_degree, max_degree, avg_degree, largest_component, num_islands, cc, directed, exponent, percentile)
def create_dict(g: gt.Graph, n_att_name, e_att_name): node_dict = {n_att_name: {'size': g.num_vertices()}} # find all the total values unique_keys = np.unique([g.vp[n_att_name][n] for n in g.vertices()]) for k in unique_keys: node_dict[n_att_name][k] = set( [n for n in g.vertices() if (g.vp[n_att_name][n] == k)]) edge_dict = {e_att_name: {'size': int(g.num_edges())}} # find all the total values unique_keys = np.unique([g.ep[e_att_name][e] for e in g.edges()]) for k in unique_keys: edge_dict[e_att_name][k] = set([(e.source(), e.target()) for e in g.edges() if g.ep[e_att_name][e] == k]) edge_dict[e_att_name][k].update( set([(e.target(), e.source()) for e in g.edges() if g.ep[e_att_name][e] == k])) # mirror_edges = [] # for e in edge_dict[e_att_name][k]: # mirror_edges.append((e[1], e[0])) # edge_dict[e_att_name][k].update(mirror_edges) return node_dict, edge_dict
def main(): """ Visualizes the research network of KTH as a graph. """ start_time = time() # Create our undirected graph to return. g = Graph(directed=False) # The edge properties measuring collaboration. e_times = g.new_edge_property("float") # Grouping value for the verticies, verticies are in the same group if the # have the same value. v_groups = g.new_vertex_property("int") # Color the verticies based on their faculties colors. v_colors = g.new_vertex_property("vector<double>") db_path = '/home/_/kth/kexet/db/kex.db' query = """SELECT * FROM final WHERE ( name LIKE '%kth%' and name LIKE '%;%' and keywords is not null and year >= 2013 and ContentType = 'Refereegranskat' and PublicationType = 'Artikel i tidskrift' );""" rows = load.rows(db_path, query) for row in rows: nobjs = parse.names(row['name'].split(';')) graph.add_relation(g, nobjs, e_times, v_colors, v_groups) g.edge_properties["times"] = e_times g.vertex_properties["colors"] = v_colors g.vertex_properties["groups"] = v_groups log.info(g.num_vertices()) log.info(g.num_edges()) g.save('a.gt') log.info('graph saved: a.gt') log.info("db & parse %ss" % round(time() - start_time, 2)) # start_time = time() # g = load_graph('a.gt') # log.info("loading %ss" % round(time() - start_time, 2)) draw.largest(g.copy()) draw.radial_highest(g.copy()) draw.sfdp(g.copy()) draw.grouped_sfdp(g.copy()) draw.min_tree(g.copy()) draw.radial_random(g.copy()) draw.hierarchy(g.copy()) draw.minimize_blockmodel(g.copy()) draw.netscience(g.copy()) draw.fruchterman(g.copy())
def largest_strongly_connected_component(self, graph): from graph_tool import Graph import graph_tool.all as gt largest_connected_component = Graph(directed=True) if not self.is_relationship: edge_prop_time = largest_connected_component.new_edge_property( "int") edge_prop_type = largest_connected_component.new_edge_property( "string") for edge in tqdm(graph.edges(data=True)): e = tuple(edge[:2]) largest_connected_component.add_edge(e[0], e[1]) if not self.is_relationship: edge_prop_time[e] = edge[-1]["time"] edge_prop_type[e] = edge[-1]["type"] largest_connected_component_view = gt.label_largest_component( largest_connected_component) largest_connected_component = gt.GraphView( largest_connected_component, vfilt=largest_connected_component_view) print( "Total nodes {0} in largest strongly connected component.".format( largest_connected_component.num_vertices())) print( "Total edges {0} in largest strongly connected component.".format( largest_connected_component.num_edges())) with open(self.output, "w+") as output_file: for edge in tqdm(largest_connected_component.edges()): if not self.is_relationship: output_file.write("{0} {1} {2} {3}\n".format( edge.source(), edge.target(), edge_prop_time[edge], edge_prop_type[edge])) else: output_file.write("{0} {1}\n".format( edge.source(), edge.target()))
def mssp(g: graph_tool.Graph, weight_prop: graph_tool.EdgePropertyMap, L, known_labels): n = g.num_vertices() dist_map = np.ones((n, n)) * np.inf for i, j in itertools.combinations(L, 2): if known_labels[i] != known_labels[j]: dist_map[i, j] = graph_tool.topology.shortest_distance( g, i, j, weight_prop) i, j = np.unravel_index(dist_map.argmin(), dist_map.shape) if weight_prop is None: total_weight = g.num_edges() + 1 else: total_weight = np.sum(weight_prop.a) + 1 if dist_map[i, j] < total_weight: path, _ = graph_tool.topology.shortest_path(g, i, j, weight_prop) mid_point = path[len(path) // 2] return mid_point else: return None
class GraphClass: #------------# # Initialize # #------------# def __init__ (self, dicProp={"Name": "Graph", "Type": "None", "Weighted": False}, graph=None): ''' init from properties ''' self.dicProperties = deepcopy(dicProp) self.dicGetProp = { "Reciprocity": get_reciprocity, "Clustering": get_clustering, "Assortativity": get_assortativity, "Diameter": get_diameter, "SCC": get_num_scc, #"Spectral radius": get_spectral_radius, "WCC": get_num_wcc, "InhibFrac": get_inhib_frac } self.dicGenGraph = { "Erdos-Renyi": gen_er, "Free-scale": gen_fs, "EDR": gen_edr } # create a graph if graph != None: # use the one furnished self.__graph = graph self.update_prop() self.bPropToDate = True elif dicProp["Type"] == "None": # create an empty graph self.__graph = Graph() self.bPropToDate = False else: # generate a graph of the requested type self.__graph = self.dicGenGraph[dicProp["Type"]](self.dicProperties) self.update_prop() self.set_name() self.bPropToDate = True @classmethod def from_graph_class(cls, graphToCopy): ''' create new GraphClass instance as a deepcopy of another ''' dicProperties = deepcopy(graphToCopy.get_dict_properties()) gtGraph = graphToCopy.get_graph().copy() # create graphClass = cls(dicProperties, gtGraph) # set state of properties bPropToDate = deepcopy(graphToCopy.bPropToDate) bBetwToDate = deepcopy(graphToCopy.bBetwToDate) graphClass.bPropToDate = bPropToDate graphClass.bBetwToDate = bBetwToDate return graphClass def copy(self): ''' returns a deepcopy of the graphClass instance ''' graphCopy = GraphClass() graphCopy.set_graph(self.__graph.copy()) graphCopy.update_prop() graphCopy.set_name(self.dicProperties["Name"]+'_copy') return graphCopy #---------------------------# # Manipulating the gt graph # #---------------------------# def set_graph(self, gtGraph): ''' acquire a graph_tool graph as its own ''' if gtGraph.__class__ == Graph: self.__graph = gtGraph else: raise TypeError("The object passed to 'copy_gt_graph' is not a < class 'graph_tool.Graph' > but a {}".format(gtGraph.__class__)) def inhibitory_subgraph(self): ''' create a GraphClass instance which graph contains only the inhibitory connections of the current instance's graph ''' graph = self.graph.copy() epropType = graph.new_edge_property("bool",-graph.edge_properties["type"].a+1) graph.set_edge_filter(epropType) inhibGraph = GraphClass() inhibGraph.set_graph(Graph(graph,prune=True)) inhibGraph.set_prop("Weighted", True) return inhibGraph def excitatory_subgraph(self): ''' create a GraphClass instance which graph contains only the excitatory connections of the current instance's graph ''' graph = self.graph.copy() epropType = graph.new_edge_property("bool",graph.edge_properties["type"].a+1) graph.set_edge_filter(epropType) excGraph = GraphClass() excGraph.set_graph(Graph(graph,prune=True)) excGraph.set_prop("Weighted", True) return excGraph #-------------------------# # Set or update functions # #-------------------------# def set_name(self,name=""): ''' set graph name ''' if name != "": self.dicProperties["Name"] = name else: strName = self.dicProperties["Type"] tplUse = ("Nodes", "Edges", "Distribution") for key,value in self.dicProperties.items(): if key in tplUse and (value.__class__ != dict): strName += '_' + key[0] + str(value) if key == "Clustering": strName += '_' + key[0] + str(around(value,4)) self.dicProperties["Name"] = strName print(self.dicProperties["Name"]) def update_prop(self, lstProp=[]): ''' update part or all of the graph properties ''' if lstProp: for strPropName in lstProp: if strPropName in self.dicGetProp.keys(): self.dicProperties[strPropName] = self.dicGetProp[strPropName](self.__graph) else: print("Ignoring unknown property '{}'".format(strPropName)) else: self.dicProperties.update({ strPropName: self.dicGetProp[strPropName](self.__graph) for strPropName in self.dicGetProp.keys() }) self.bPropToDate = True #---------------# # Get functions # #---------------# ## basic properties def get_name(self): return self.dicProperties["Name"] def num_vertices(self): return self.__graph.num_vertices() def num_edges(self): return self.__graph.num_edges() def get_density(self): return self.__graph.num_edges()/float(self.__graph.num_vertices()**2) def is_weighted(self): return self.dicProperties["Weighted"] ## graph and adjacency matrix def get_graph(self): self.bPropToDate = False self.bBetwToDate = False self.wBetweeness = False return self.__graph def get_mat_adjacency(self): return adjacency(self.__graph, self.get_weights()) ## complex properties def get_prop(self, strPropName): if strPropName in self.dicProperties.keys(): if not self.bPropToDate: self.dicProperties[strPropName] = self.dicGetProp[strPropName](self.__graph) return self.dicProperties[strPropName] else: print("Ignoring request for unknown property '{}'".format(strPropName)) def get_dict_properties(self): return self.dicProperties def get_degrees(self, strType="total", bWeights=True): lstValidTypes = ["in", "out", "total"] if strType in lstValidTypes: return degree_list(self.__graph, strType, bWeights) else: print("Ignoring invalid degree type '{}'".format(strType)) return None def get_betweenness(self, bWeights=True): if bWeights: if not self.bWBetwToDate: self.wBetweeness = betweenness_list(self.__graph, bWeights) self.wBetweeness = True return self.wBetweeness if not self.bBetwToDate and not bWeights: self.betweenness = betweenness_list(self.__graph, bWeights) self.bBetwToDate = True return self.betweenness def get_types(self): if "type" in self.graph.edge_properties.keys(): return self.__graph.edge_properties["type"].a else: return repeat(1, self.__graph.num_edges()) def get_weights(self): if self.dicProperties["Weighted"]: epropW = self.__graph.edge_properties["weight"].copy() epropW.a = multiply(epropW.a, self.__graph.edge_properties["type"].a) return epropW else: return self.__graph.edge_properties["type"].copy()
def prepare_hierarchies_neighborhood( experiments_path: ExperimentPaths, conceptnet_graph_path: Union[str, Path], filter_graphs_to_intersected_vertices: bool = True, ) -> pd.DataFrame: conceptnet_hierarchy_neighborhood_df_path = ( experiments_path.experiment_path / f"shortest-paths-pairs-{conceptnet_graph_path.stem}-df.pkl") logger.info("Prepare graphs") conceptnet_graph, vertices_conceptnet = prepare_conceptnet( conceptnet_graph_path) mlflow.log_metric("conceptnet_graph_nodes", conceptnet_graph.num_vertices()) mlflow.log_metric("conceptnet_graph_edges", conceptnet_graph.num_edges()) aspect_graph, experiment_paths = prepare_aspect_graph(experiments_path) mlflow.log_metric("aspect_graph_nodes", aspect_graph.num_vertices()) mlflow.log_metric("aspect_graph_edges", aspect_graph.num_edges()) aspect_graph_intersected = Graph(aspect_graph) conceptnet_graph_intersected = Graph(conceptnet_graph) aspect_graph_intersected, conceptnet_graph_intersected = intersected_nodes( aspect_graph=aspect_graph_intersected, conceptnet_graph=conceptnet_graph_intersected, filter_graphs_to_intersected_vertices= filter_graphs_to_intersected_vertices, property_name="aspect_name", ) mlflow.log_param("filter_graphs_to_intersected_vertices", filter_graphs_to_intersected_vertices) mlflow.log_metric( "conceptnet_graph_intersected_nodes", conceptnet_graph_intersected.num_vertices(), ) mlflow.log_metric("aspect_graph_intersected_nodes", aspect_graph_intersected.num_vertices()) mlflow.log_metric("conceptnet_graph_intersected_edges", conceptnet_graph_intersected.num_edges()) mlflow.log_metric("aspect_graph_intersected_edges", aspect_graph_intersected.num_edges()) aspect_names_intersected = list( aspect_graph_intersected.vertex_properties["aspect_name"]) vertices_name_to_aspect_vertex = dict( zip(aspect_graph.vertex_properties["aspect_name"], aspect_graph.vertices())) aspect_graph_vertices_intersected = [ vertices_name_to_aspect_vertex[a] for a in aspect_names_intersected ] shortest_distances_aspect_graph = np.array([ shortest_distance( g=aspect_graph, source=v, target=aspect_graph_vertices_intersected, directed=True, ) for v in tqdm(aspect_graph_vertices_intersected, desc="Aspect graph shortest paths...") ]) conceptnet_vertices_intersected = [ vertices_conceptnet[a] for a in aspect_names_intersected ] mlflow.log_metric("conceptnet_vertices_intersected len", len(conceptnet_vertices_intersected)) mlflow.log_metric("aspect_graph_vertices_intersected len", len(aspect_graph_vertices_intersected)) assert len(conceptnet_vertices_intersected) == len( aspect_graph_vertices_intersected ), "Wrong sequence of vertices in both graphs" shortest_distances_conceptnet = np.array([ shortest_distance( g=conceptnet_graph, source=v, target=conceptnet_vertices_intersected, directed=True, ) for v in tqdm(conceptnet_vertices_intersected, desc="Conceptnet shortest paths...") ]) pairs = [] for aspect_1_idx, aspect_1 in tqdm(enumerate(aspect_names_intersected), desc="Pairs distances..."): for aspect_2_idx, aspect_2 in enumerate(aspect_names_intersected): pairs.append(( aspect_1, aspect_2, shortest_distances_aspect_graph[aspect_1_idx][aspect_2_idx], shortest_distances_conceptnet[aspect_1_idx][aspect_2_idx], )) pairs_df = pd.DataFrame( pairs, columns=[ "aspect_1", "aspect_2", "shortest_distance_aspect_graph", "shortest_distance_conceptnet", ], ) logger.info("Dump DataFrame with pairs") pairs_df.to_pickle(conceptnet_hierarchy_neighborhood_df_path.as_posix()) mlflow.log_artifact(conceptnet_hierarchy_neighborhood_df_path.as_posix()) mlflow.log_metric("conceptnet_hierarchy_neighborhood_df_len", len(pairs_df)) logger.info( f"DataFrame with pairs dumped in: {experiment_paths.conceptnet_hierarchy_neighborhood.as_posix()}" ) return pairs_df
class SegmentationGraph(object): """ Class defining the abstract SegmentationGraph object, its attributes and implements methods common to all derived graph classes. The constructor requires the following parameters of the underlying segmentation that will be used to build the graph. """ def __init__(self): """ Constructor of the abstract SegmentationGraph object. Returns: None """ self.graph = Graph(directed=False) """graph_tool.Graph: a graph object storing the segmentation graph topology, geometry and properties (initially empty). """ # Add "internal property maps" to the graph. # vertex property for storing the xyz coordinates of the corresponding # vertex: self.graph.vp.xyz = self.graph.new_vertex_property("vector<float>") # edge property for storing the distance between the connected vertices: self.graph.ep.distance = self.graph.new_edge_property("float") self.coordinates_to_vertex_index = {} """dict: a dictionary mapping the vertex coordinates (x, y, z) to the vertex index. """ self.coordinates_pair_connected = set() """set: a set storing pairs of vertex coordinates that are connected by an edge in a tuple form ((x1, y1, z1), (x2, y2, z2)). """ @staticmethod def distance_between_voxels(voxel1, voxel2): """ Calculates and returns the Euclidean distance between two voxels. Args: voxel1 (tuple): first voxel coordinates in form of a tuple of floats of length 3 (x1, y1, z1) voxel2 (tuple): second voxel coordinates in form of a tuple of floats of length 3 (x2, y2, z2) Returns: the Euclidean distance between two voxels (float) """ if (isinstance(voxel1, tuple) and (len(voxel1) == 3) and isinstance(voxel2, tuple) and (len(voxel2) == 3)): sum_of_squared_differences = 0 for i in range(3): # for each dimension sum_of_squared_differences += (voxel1[i] - voxel2[i])**2 return math.sqrt(sum_of_squared_differences) else: raise pexceptions.PySegInputError( expr='distance_between_voxels (SegmentationGraph)', msg=('Tuples of integers of length 3 required as first and ' 'second input.')) def update_coordinates_to_vertex_index(self): """ Updates graph's dictionary coordinates_to_vertex_index. The dictionary maps the vertex coordinates (x, y, z) to the vertex index. It has to be updated after purging the graph, because vertices are renumbered, as well as after reading a graph from a file (e.g. before density calculation). Returns: None """ self.coordinates_to_vertex_index = {} for vd in self.graph.vertices(): [x, y, z] = self.graph.vp.xyz[vd] self.coordinates_to_vertex_index[(x, y, z)] = self.graph.vertex_index[vd] def calculate_density(self, size, scale, mask=None, target_coordinates=None, verbose=False): """ Calculates ribosome density for each membrane graph vertex. Calculates shortest geodesic distances (d) for each vertex in the graph to each reachable ribosome center mapped on the membrane given by a binary mask with coordinates in pixels or an array of coordinates in given units. Then, calculates a density measure of ribosomes at each vertex or membrane voxel: D = sum {over all reachable ribosomes} (1 / (d + 1)). Adds the density as vertex PropertyMap to the graph. Returns an array with the same shape as the underlying segmentation with the densities plus 1, in order to distinguish membrane voxels with 0 density from the background. Args: size (tuple): size in voxels (X, Y, Z) of the original segmentation scale (tuple): pixel size (X, Y, Z) in given units of the original segmentation mask (numpy.ndarray, optional): a binary mask of the ribosome centers as 3D array where indices are coordinates in pixels (default None) target_coordinates (numpy.ndarray, optional): the ribosome centers coordinates in given units as 2D array in format [[x1, y1, z1], [x2, y2, z2], ...] (default None) verbose (boolean, optional): if True (default False), some extra information will be printed out Returns: a 3D numpy ndarray with the densities + 1 Note: One of the two parameters, mask or target_coordinates, has to be given. """ from . import ribosome_density as rd # If a mask is given, find the set of voxels of ribosome centers mapped # on the membrane, 'target_voxels', and rescale them to units, # 'target_coordinates': if mask is not None: if mask.shape != size: raise pexceptions.PySegInputError( expr='calculate_density (SegmentationGraph)', msg=("Size of the input 'mask' have to be equal to those " "set during the generation of the graph.")) # output as a list of tuples [(x1,y1,z1), (x2,y2,z2), ...] in pixels target_voxels = rd.get_foreground_voxels_from_mask(mask) # for rescaling have to convert to an ndarray target_ndarray_voxels = rd.tupel_list_to_ndarray_voxels( target_voxels) # rescale to units, output an ndarray [[x1,y1,z1], [x2,y2,z2], ...] target_ndarray_coordinates = (target_ndarray_voxels * np.asarray(scale)) # convert to a list of tuples, which are in units now target_coordinates = rd.ndarray_voxels_to_tupel_list( target_ndarray_coordinates) # If target_coordinates are given (in units), convert them from a numpy # ndarray to a list of tuples: elif target_coordinates is not None: target_coordinates = rd.ndarray_voxels_to_tupel_list( target_coordinates) # Exit if the target_voxels list is empty: if len(target_coordinates) == 0: raise pexceptions.PySegInputError( expr='calculate_density (SegmentationGraph)', msg="No target voxels were found! Check your input ('mask' or " "'target_coordinates').") print('{} target voxels'.format(len(target_coordinates))) if verbose: print(target_coordinates) # Pre-filter the target coordinates to those existing in the graph # (should already all be in the graph, but just in case): target_coordinates_in_graph = [] for target_xyz in target_coordinates: if target_xyz in self.coordinates_to_vertex_index: target_coordinates_in_graph.append(target_xyz) else: raise pexceptions.PySegInputWarning( expr='calculate_density (SegmentationGraph)', msg=('Target ({}, {}, {}) not inside the membrane!'.format( target_xyz[0], target_xyz[1], target_xyz[2]))) print('{} target coordinates in graph'.format( len(target_coordinates_in_graph))) if verbose: print(target_coordinates_in_graph) # Get all indices of the target coordinates: target_vertices_indices = [] for target_xyz in target_coordinates_in_graph: v_target_index = self.coordinates_to_vertex_index[target_xyz] target_vertices_indices.append(v_target_index) # Density calculation # Add a new vertex property to the graph, density: self.graph.vp.density = self.graph.new_vertex_property("float") # Dictionary mapping voxel coordinates (for the volume returned later) # to a list of density values falling within that voxel: voxel_to_densities = {} # For each vertex in the graph: for v_membrane in self.graph.vertices(): # Get its coordinates: membrane_xyz = self.graph.vp.xyz[v_membrane] if verbose: print('Membrane vertex ({}, {}, {})'.format( membrane_xyz[0], membrane_xyz[1], membrane_xyz[2])) # Get a distance map with all pairs of distances between current # graph vertex (membrane_xyz) and target vertices (ribosome # coordinates): dist_map = shortest_distance(self.graph, source=v_membrane, target=target_vertices_indices, weights=self.graph.ep.distance) # Iterate over all shortest distances from the membrane vertex to # the target vertices, while calculating the density: # Initializing: membrane coordinates with no reachable ribosomes # will have a value of 0, those with reachable ribosomes > 0. density = 0 # If there is only one target voxel, dist_map is a single value - # wrap it into a list. if len(target_coordinates_in_graph) == 1: dist_map = [dist_map] for d in dist_map: if verbose: print('\tTarget vertex ...') # if unreachable, the maximum float64 is stored if d == np.finfo(np.float64).max: if verbose: print('\t\tunreachable') else: if verbose: print('\t\td = {}'.format(d)) density += 1 / (d + 1) # Add the density of the membrane vertex as a property of the # current vertex in the graph: self.graph.vp.density[v_membrane] = density # Calculate the corresponding voxel of the vertex and add the # density to the list keyed by the voxel in the dictionary: # Scaling the coordinates back from units to voxels. (Without round # float coordinates are truncated to the next lowest integer.) voxel_x = int(round(membrane_xyz[0] / scale[0])) voxel_y = int(round(membrane_xyz[1] / scale[1])) voxel_z = int(round(membrane_xyz[2] / scale[2])) voxel = (voxel_x, voxel_y, voxel_z) if voxel in voxel_to_densities: voxel_to_densities[voxel].append(density) else: voxel_to_densities[voxel] = [density] if verbose: print('\tdensity = {}'.format(density)) if (self.graph.vertex_index[v_membrane] + 1) % 1000 == 0: now = datetime.now() print('{} membrane vertices processed on: {}-{}-{} {}:{}:{}'. format(self.graph.vertex_index[v_membrane] + 1, now.year, now.month, now.day, now.hour, now.minute, now.second)) # Initialize an array scaled like the original segmentation, which will # hold in each membrane voxel the maximal density among the # corresponding vertex coordinates in the graph plus 1 and 0 in each # background (non-membrane) voxel: densities = np.zeros(size, dtype=np.float16) # The densities array membrane voxels are initialized with 1 in order to # distinguish membrane voxels from the background. for voxel in voxel_to_densities: densities[voxel[0], voxel[1], voxel[2]] = 1 + max(voxel_to_densities[voxel]) if verbose: print('densities:\n{}'.format(densities)) return densities def graph_to_points_and_lines_polys(self, vertices=True, edges=True, verbose=False): """ Generates a VTK PolyData object from the graph with vertices as vertex-cells (containing 1 point) and edges as line-cells (containing 2 points). Args: vertices (boolean, optional): if True (default) vertices are stored a VTK PolyData object as vertex-cells edges (boolean, optional): if True (default) edges are stored a VTK PolyData object as line-cells verbose (boolean, optional): if True (default False), some extra information will be printed out Returns: - vtk.vtkPolyData with vertex-cells - vtk.vtkPolyData with edges as line-cells """ # Initialization poly_verts = vtk.vtkPolyData() poly_lines = vtk.vtkPolyData() points = vtk.vtkPoints() vertex_arrays = list() edge_arrays = list() # Vertex property arrays for prop_key in list(self.graph.vp.keys()): data_type = self.graph.vp[prop_key].value_type() if (data_type != 'string' and data_type != 'python::object' and prop_key != 'xyz'): if verbose: print('\nvertex property key: {}'.format(prop_key)) print('value type: {}'.format(data_type)) if data_type[0:6] != 'vector': # scalar num_components = 1 else: # vector num_components = len( self.graph.vp[prop_key][self.graph.vertex(0)]) array = TypesConverter().gt_to_vtk(data_type) array.SetName(prop_key) if verbose: print('number of components: {}'.format(num_components)) array.SetNumberOfComponents(num_components) vertex_arrays.append(array) # Edge property arrays for prop_key in list(self.graph.ep.keys()): data_type = self.graph.ep[prop_key].value_type() if data_type != 'string' and data_type != 'python::object': if verbose: print('\nedge property key: {}'.format(prop_key)) print('value type: {}'.format(data_type)) if data_type[0:6] != 'vector': # scalar num_components = 1 else: # vector (all edge properties so far are scalars) # num_components = len( # self.graph.ep[prop_key][self.graph.edge(0, 1)]) num_components = 3 if verbose: print('Sorry, not implemented yet, assuming a vector ' 'with 3 components.') array = TypesConverter().gt_to_vtk(data_type) array.SetName(prop_key) if verbose: print('number of components: {}'.format(num_components)) array.SetNumberOfComponents(num_components) edge_arrays.append(array) if verbose: print('\nvertex arrays length: {}'.format(len(vertex_arrays))) print('edge arrays length: {}'.format(len(edge_arrays))) # Geometry lut = np.zeros(shape=self.graph.num_vertices(), dtype=np.int) for i, vd in enumerate(self.graph.vertices()): [x, y, z] = self.graph.vp.xyz[vd] points.InsertPoint(i, x, y, z) lut[self.graph.vertex_index[vd]] = i if verbose: print('number of points: {}'.format(points.GetNumberOfPoints())) # Topology # Vertices verts = vtk.vtkCellArray() if vertices: for vd in self.graph.vertices(): # vd = vertex descriptor verts.InsertNextCell(1) verts.InsertCellPoint(lut[self.graph.vertex_index[vd]]) for array in vertex_arrays: prop_key = array.GetName() n_comp = array.GetNumberOfComponents() data_type = self.graph.vp[prop_key].value_type() data_type = TypesConverter().gt_to_numpy(data_type) array.InsertNextTuple( self.get_vertex_prop_entry(prop_key, vd, n_comp, data_type)) if verbose: print('number of vertex cells: {}'.format( verts.GetNumberOfCells())) # Edges lines = vtk.vtkCellArray() if edges: for ed in self.graph.edges(): # ed = edge descriptor lines.InsertNextCell(2) lines.InsertCellPoint( lut[self.graph.vertex_index[ed.source()]]) lines.InsertCellPoint( lut[self.graph.vertex_index[ed.target()]]) for array in edge_arrays: prop_key = array.GetName() n_comp = array.GetNumberOfComponents() data_type = self.graph.ep[prop_key].value_type() data_type = TypesConverter().gt_to_numpy(data_type) array.InsertNextTuple( self.get_edge_prop_entry(prop_key, ed, n_comp, data_type)) if verbose: print('number of line cells: {}'.format( lines.GetNumberOfCells())) # vtkPolyData construction poly_verts.SetPoints(points) poly_lines.SetPoints(points) if vertices: poly_verts.SetVerts(verts) if edges: poly_lines.SetLines(lines) for array in vertex_arrays: poly_verts.GetCellData().AddArray(array) for array in edge_arrays: poly_lines.GetCellData().AddArray(array) return poly_verts, poly_lines def get_vertex_prop_entry(self, prop_key, vertex_descriptor, n_comp, data_type): """ Gets a property value of a vertex for inserting into a VTK vtkDataArray object. This function is used by the methods graph_to_points_and_lines_polys and graph_to_triangle_poly (the latter of the derived classes PointGraph and TriangleGraph (in surface_graphs). Args: prop_key (str): name of the desired vertex property vertex_descriptor (graph_tool.Vertex): vertex descriptor of the current vertex n_comp (int): number of components of the array (length of the output tuple) data_type: numpy data type converted from a graph-tool property value type by TypesConverter().gt_to_numpy Returns: a tuple (with length like n_comp) with the property value of the vertex converted to a numpy data type """ prop = list() if n_comp == 1: prop.append(data_type(self.graph.vp[prop_key][vertex_descriptor])) else: for i in range(n_comp): prop.append( data_type(self.graph.vp[prop_key][vertex_descriptor][i])) return tuple(prop) def get_edge_prop_entry(self, prop_key, edge_descriptor, n_comp, data_type): """ Gets a property value of an edge for inserting into a VTK vtkDataArray object. This private function is used by the method graph_to_points_and_lines_polys. Args: prop_key (str): name of the desired vertex property edge_descriptor (graph_tool.Edge): edge descriptor of the current edge n_comp (int): number of components of the array (length of the output tuple) data_type: numpy data type converted from a graph-tool property value type by TypesConverter().gt_to_numpy Returns: a tuple (with length like n_comp) with the property value of the edge converted to a numpy data type """ prop = list() if n_comp == 1: prop.append(data_type(self.graph.ep[prop_key][edge_descriptor])) else: for i in range(n_comp): prop.append( data_type(self.graph.ep[prop_key][edge_descriptor][i])) return tuple(prop) # * The following SegmentationGraph methods are needed for the normal vector # voting algorithm. * def calculate_average_edge_length(self, prop_e=None, value=1, verbose=False): """ Calculates the average edge length in the graph. If a special edge property is specified, includes only the edges where this property equals the given value. If there are no edges in the graph, the given property does not exist or there are no edges with the given property equaling the given value, None is returned. Args: prop_e (str, optional): edge property, if specified only edges where this property equals the given value will be considered value (int, optional): value of the specified edge property an edge has to have in order to be considered (default 1) verbose (boolean, optional): if True (default False), some extra information will be printed out Returns: the average edge length in the graph (float) or None """ total_edge_length = 0 average_edge_length = None if prop_e is None: if verbose: print("Considering all edges:") if self.graph.num_edges() > 0: if verbose: print("{} edges".format(self.graph.num_edges())) average_edge_length = np.mean(self.graph.ep.distance.a) else: print("There are no edges in the graph!") elif prop_e in self.graph.edge_properties: if verbose: print("Considering only edges with property {} equaling value " "{}!".format(prop_e, value)) num_special_edges = 0 for ed in self.graph.edges(): if self.graph.edge_properties[prop_e][ed] == value: num_special_edges += 1 total_edge_length += self.graph.ep.distance[ed] if num_special_edges > 0: if verbose: print("{} such edges".format(num_special_edges)) average_edge_length = total_edge_length / num_special_edges else: print("There are no edges with the property {} equaling value " "{}!".format(prop_e, value)) if verbose: print("Average length: {}".format(average_edge_length)) return average_edge_length def find_geodesic_neighbors(self, v, g_max, full_dist_map=None, only_surface=False, verbose=False): """ Finds geodesic neighbor vertices of a given vertex v in the graph that are within a given maximal geodesic distance g_max from it. Also finds the corresponding geodesic distances. All edges are considered. The distances are calculated with Dijkstra's algorithm. Args: v (graph_tool.Vertex): the source vertex g_max: maximal geodesic distance (in the units of the graph) full_dist_map (graph_tool.PropertyMap, optional): the full distance map for the whole graph; if None, a local distance map is calculated for each vertex (default) only_surface (boolean, optional): if True (default False), only neighbors classified as surface patch (class 1) are considered verbose (boolean, optional): if True (default False), some extra information will be printed out Returns: a dictionary mapping a neighbor vertex index to the geodesic distance from vertex v """ if full_dist_map is not None: dist_v = full_dist_map[v].get_array() else: dist_v = shortest_distance(self.graph, source=v, target=None, weights=self.graph.ep.distance, max_dist=g_max) dist_v = dist_v.get_array() # numpy array of distances from v to all vertices, in vertex index order vertex = self.graph.vertex orientation_class = self.graph.vp.orientation_class neighbor_id_to_dist = dict() idxs = np.where(dist_v <= g_max)[0] # others are INF for idx in idxs: dist = dist_v[idx] if dist != 0: # ignore the source vertex itself v_i = vertex(idx) if (not only_surface) or orientation_class[v_i] == 1: neighbor_id_to_dist[idx] = dist if verbose: print("{} neighbors".format(len(neighbor_id_to_dist))) return neighbor_id_to_dist def find_geodesic_neighbors_exact(self, o, g_max, only_surface=False, verbose=False, debug=False): """ Finds geodesic neighbor vertices of the origin vertex o in the graph that are within a given maximal geodesic distance g_max from it. Also finds the corresponding geodesic distances. All edges and faces are considered. The distances are calculated with Sun's and Abidi's algorithm, a simplification of Kimmels' and Sethian's fast marching algorithm. Args: o (graph_tool.Vertex): the source vertex g_max: maximal geodesic distance (in the units of the graph) only_surface (boolean, optional): if True (default False), only neighbors classified as surface patch (class 1) are considered verbose (boolean, optional): if True (default False), some extra information will be printed out debug (boolean, optional): if True (default False), some more extra information will be printed out Returns: a dictionary mapping a neighbor vertex index to the geodesic distance from vertex o """ # Shortcuts xyz = self.graph.vp.xyz vertex = self.graph.vertex orientation_class = self.graph.vp.orientation_class distance_between_voxels = self.distance_between_voxels calculate_geodesic_distance = self._calculate_geodesic_distance insert_geo_dist_vertex_id = self._insert_geo_dist_vertex_id # Initialization geo_dist_heap = [] # heap has the smallest geodesic distance first # dictionaries to keep track which geodesic distance belongs to which # vertex or vertices and vice versa geo_dist_to_vertex_ids = {} vertex_id_to_geo_dist = {} neighbor_id_to_dist = {} # output dictionary # Tag the center point (o) as Alive: self.graph.vp.tag = self.graph.new_vertex_property("string") tag = self.graph.vp.tag # shortcut tag[o] = "Alive" if debug: print("Vertex o={}: Alive".format(int(o))) vertex_id_to_geo_dist[int(o)] = 0 # need it for geo. dist. calculation xyz_o = tuple(xyz[o]) for n in o.all_neighbours(): # Tag all neighboring points of the center point (n) as Close tag[n] = "Close" # Geodesic distance in this case = Euclidean between o and n xyz_n = tuple(xyz[n]) on = distance_between_voxels(xyz_o, xyz_n) if debug: print("Vertex n={}: Close with distance {}".format(int(n), on)) heappush(geo_dist_heap, on) insert_geo_dist_vertex_id(geo_dist_to_vertex_ids, on, int(n)) vertex_id_to_geo_dist[int(n)] = on # Repeat while the smallest distance is <= g_max while len(geo_dist_heap) >= 1 and geo_dist_heap[0] <= g_max: if debug: print("\n{} distances in heap, first={}".format( len(geo_dist_heap), geo_dist_heap[0])) # 1. Change the tag of the point in Close with the smallest # geodesic distance (a) from Close to Alive smallest_geo_dist = heappop(geo_dist_heap) closest_vertices_ids = geo_dist_to_vertex_ids[smallest_geo_dist] a = vertex(closest_vertices_ids[0]) if len(closest_vertices_ids) > 1: # move the first one (a) to the # back, so it's not taken again next time closest_vertices_ids.pop(0) closest_vertices_ids.append(int(a)) tag[a] = "Alive" # only proceed if a is a surface patch: if only_surface and orientation_class[a] != 1: continue neighbor_id_to_dist[int(a)] = smallest_geo_dist # add a to output if debug: print("Vertex a={}: Alive".format(int(a))) neighbors_a = set(a.all_neighbours()) # actually don't have # duplicates, but like this can use fast sets' intersection method for c in neighbors_a: # 2. Tag all neighboring points (c) of this point as Close, # but skip those which are Alive already if tag[c] == "Alive": if debug: print("Skipping Alive neighbor {}".format(int(c))) continue tag[c] = "Close" if debug: print("Vertex c={}: Close".format(int(c))) # 3. Recompute the geodesic distance of these neighboring # points, using only values of points that are Alive, and renew # it only if the recomputed result is smaller # Find Alive point b, belonging to the same triangle as a and c: # iterate over an intersection of the neighbors of a and c neighbors_c = set(c.all_neighbours()) common_neighbors_a_c = neighbors_a.intersection(neighbors_c) for b in common_neighbors_a_c: # check if b is tagged Alive if tag[b] == "Alive": if debug: print("\tUsing vertex b={}".format(int(b))) new_geo_dist_c = calculate_geodesic_distance( a, b, xyz[c].a, vertex_id_to_geo_dist, verbose=verbose) if int(c) not in vertex_id_to_geo_dist: # add c if debug: print("\tadding new distance {}".format( new_geo_dist_c)) vertex_id_to_geo_dist[int(c)] = new_geo_dist_c heappush(geo_dist_heap, new_geo_dist_c) insert_geo_dist_vertex_id(geo_dist_to_vertex_ids, new_geo_dist_c, int(c)) else: old_geo_dist_c = vertex_id_to_geo_dist[int(c)] if new_geo_dist_c < old_geo_dist_c: # update c if debug: print( "\tupdating distance {} to {}".format( old_geo_dist_c, new_geo_dist_c)) vertex_id_to_geo_dist[int(c)] = new_geo_dist_c if old_geo_dist_c in geo_dist_heap: # check because it is sometimes not there geo_dist_heap.remove(old_geo_dist_c) heappush(geo_dist_heap, new_geo_dist_c) old_geo_dist_vertex_ids = geo_dist_to_vertex_ids[ old_geo_dist_c] if len(old_geo_dist_vertex_ids) == 1: del geo_dist_to_vertex_ids[old_geo_dist_c] else: old_geo_dist_vertex_ids.remove(int(c)) insert_geo_dist_vertex_id( geo_dist_to_vertex_ids, new_geo_dist_c, int(c)) elif debug: print("\tkeeping the old distance={}, because " "it's <= the new={}".format( old_geo_dist_c, new_geo_dist_c)) # if debug: # print(geo_dist_heap) # print(geo_dist_to_vertex_ids) # print(vertex_id_to_geo_dist) # print(neighbor_id_to_dist) break # one Alive b is expected, stop iteration else: if debug: print("\tNo common neighbors of a and c are Alive") del self.graph.vertex_properties["tag"] if debug: print("Vertex o={} has {} geodesic neighbors".format( int(o), len(neighbor_id_to_dist))) if verbose: print("{} neighbors".format(len(neighbor_id_to_dist))) return neighbor_id_to_dist def _calculate_geodesic_distance(self, a, b, xyz_c, vertex_id_to_geo_dist, verbose=False): geo_dist_a = vertex_id_to_geo_dist[int(a)] geo_dist_b = vertex_id_to_geo_dist[int(b)] xyz_a = self.graph.vp.xyz[a].a xyz_b = self.graph.vp.xyz[b].a ab = euclidean_distance(xyz_a, xyz_b) ac = euclidean_distance(xyz_a, xyz_c) bc = euclidean_distance(xyz_b, xyz_c) # maybe faster to use linalg.euclidean_distance directly on np.ndarrays alpha = nice_acos((ab**2 + ac**2 - bc**2) / (2 * ab * ac)) beta = nice_acos((ab**2 + bc**2 - ac**2) / (2 * ab * bc)) if alpha < (math.pi / 2) and beta < (math.pi / 2): if verbose: print("\ttriangle abc is acute") theta = nice_acos((geo_dist_a**2 + ab**2 - geo_dist_b**2) / (2 * ab * geo_dist_a)) geo_dist_c = math.sqrt(ac**2 + geo_dist_a**2 - 2 * ac * geo_dist_a * math.cos(alpha + theta)) else: if verbose: print("\ttriangle abc is obtuse") geo_dist_c = min(geo_dist_a + ac, geo_dist_b + bc) return geo_dist_c @staticmethod def _insert_geo_dist_vertex_id(geo_dist_to_vertices, geo_dist, vertex_ind): if geo_dist in geo_dist_to_vertices: geo_dist_to_vertices[geo_dist].append(vertex_ind) else: geo_dist_to_vertices[geo_dist] = [vertex_ind] def get_vertex_property_array(self, property_name): """ Gets a numpy array with all values of a vertex property of the graph, printing out the number of values, the minimal and the maximal value. Args: property_name (str): vertex property name Returns: an array (numpy.ndarray) with all values of the vertex property """ if (isinstance(property_name, str) and property_name in self.graph.vertex_properties): values = np.array( self.graph.vertex_properties[property_name].get_array()) print('{} "{}" values'.format(len(values), property_name)) print('min = {}, max = {}, mean = {}'.format( min(values), max(values), np.mean(values))) return values else: raise pexceptions.PySegInputError( expr='get_vertex_property_array (SegmentationGraph)', msg=('The input "{}" is not a str object or is not found in ' 'vertex properties of the graph.'.format(property_name)))
def build_graph(df_list, sens='ST', top=410, min_sens=0.01, edge_cutoff=0.0): """ Initializes and constructs a graph where vertices are the parameters selected from the first dataframe in 'df_list', subject to the constraints set by 'sens', 'top', and 'min_sens'. Edges are the second order sensitivities of the interactions between those vertices, with sensitivities greater than 'edge_cutoff'. Parameters ----------- df_list : list A list of two dataframes. The first dataframe should be the first/total order sensitivities collected by the function data_processing.get_sa_data(). sens : str, optional A string with the name of the sensitivity that you would like to use for the vertices ('ST' or 'S1'). top : int, optional An integer specifying the number of vertices to display ( the top sensitivity values). min_sens : float, optional A float with the minimum sensitivity to allow in the graph. edge_cutoff : float, optional A float specifying the minimum second order sensitivity to show as an edge in the graph. Returns -------- g : graph-tool object a graph-tool graph object of the network described above. Each vertex has properties 'param', 'sensitivity', and 'confidence' corresponding to the name of the parameter, value of the sensitivity index, and it's confidence interval. The only edge property is 'second_sens', the second order sensitivity index for the interaction between the two vertices it connects. """ # get the first/total index dataframe and second order dataframe df = df_list[0] df2 = df_list[1] # Make sure sens is ST or S1 if sens not in set(['ST', 'S1']): raise ValueError('sens must be ST or S1') # Make sure that there is a second order index dataframe try: if not df2: raise Exception('Missing second order dataframe!') except: pass # slice the dataframes so the resulting graph will only include the top # 'top' values of 'sens' greater than 'min_sens'. df = df.sort_values(sens, ascending=False) df = df.ix[df[sens] > min_sens, :].head(top) df = df.reset_index() # initialize a graph g = Graph() vprop_sens = g.new_vertex_property('double') vprop_conf = g.new_vertex_property('double') vprop_name = g.new_vertex_property('string') eprop_sens = g.new_edge_property('double') g.vertex_properties['param'] = vprop_name g.vertex_properties['sensitivity'] = vprop_sens g.vertex_properties['confidence'] = vprop_conf g.edge_properties['second_sens'] = eprop_sens # keep a list of all the vertices v_list = [] # Add the vertices to the graph for i, param in enumerate(df['Parameter']): v = g.add_vertex() vprop_sens[v] = df.ix[i, sens] vprop_conf[v] = 1 + df.ix[i, '%s_conf' % sens] / df.ix[i, sens] vprop_name[v] = param v_list.append(v) # Make two new columns in second order dataframe that point to the vertices # connected on each row. df2['vertex1'] = -999 df2['vertex2'] = -999 for vertex in v_list: param = g.vp.param[vertex] df2.ix[df2['Parameter_1'] == param, 'vertex1'] = vertex df2.ix[df2['Parameter_2'] == param, 'vertex2'] = vertex # Only allow edges for vertices that we've defined df_edges = df2[(df2['vertex1'] != -999) & (df2['vertex2'] != -999)] # eliminate edges below a certain cutoff value pruned = df_edges[df_edges['S2'] > edge_cutoff] pruned.reset_index(inplace=True) # Add the edges for the graph for i, sensitivity in enumerate(pruned['S2']): v1 = pruned.ix[i, 'vertex1'] v2 = pruned.ix[i, 'vertex2'] e = g.add_edge(v1, v2) # multiply by a number to make the lines visible on the plot eprop_sens[e] = sensitivity * 150 # These are ways you can reference properties of vertices or edges # g.vp.param[g.vertex(77)] # g.vp.param[v_list[0]] print('Created a graph with %s vertices and %s edges.\nVertices are the ' 'top %s %s values greater than %s.\nOnly S2 values (edges) ' 'greater than %s are included.' % (g.num_vertices(), g.num_edges(), top, sens, min_sens, edge_cutoff)) return g
def build_graph(df_list, sens='ST', top=410, min_sens=0.01, edge_cutoff=0.0): """ Initializes and constructs a graph where vertices are the parameters selected from the first dataframe in 'df_list', subject to the constraints set by 'sens', 'top', and 'min_sens'. Edges are the second order sensitivities of the interactions between those vertices, with sensitivities greater than 'edge_cutoff'. Parameters ----------- df_list : list A list of two dataframes. The first dataframe should be the first/total order sensitivities collected by the function data_processing.get_sa_data(). sens : str, optional A string with the name of the sensitivity that you would like to use for the vertices ('ST' or 'S1'). top : int, optional An integer specifying the number of vertices to display ( the top sensitivity values). min_sens : float, optional A float with the minimum sensitivity to allow in the graph. edge_cutoff : float, optional A float specifying the minimum second order sensitivity to show as an edge in the graph. Returns -------- g : graph-tool object a graph-tool graph object of the network described above. Each vertex has properties 'param', 'sensitivity', and 'confidence' corresponding to the name of the parameter, value of the sensitivity index, and it's confidence interval. The only edge property is 'second_sens', the second order sensitivity index for the interaction between the two vertices it connects. """ # get the first/total index dataframe and second order dataframe df = df_list[0] df2 = df_list[1] # Make sure sens is ST or S1 if sens not in set(['ST', 'S1']): raise ValueError('sens must be ST or S1') # Make sure that there is a second order index dataframe try: if not df2: raise Exception('Missing second order dataframe!') except: pass # slice the dataframes so the resulting graph will only include the top # 'top' values of 'sens' greater than 'min_sens'. df = df.sort_values(sens, ascending=False) df = df.ix[df[sens] > min_sens, :].head(top) df = df.reset_index() # initialize a graph g = Graph() vprop_sens = g.new_vertex_property('double') vprop_conf = g.new_vertex_property('double') vprop_name = g.new_vertex_property('string') eprop_sens = g.new_edge_property('double') g.vertex_properties['param'] = vprop_name g.vertex_properties['sensitivity'] = vprop_sens g.vertex_properties['confidence'] = vprop_conf g.edge_properties['second_sens'] = eprop_sens # keep a list of all the vertices v_list = [] # Add the vertices to the graph for i, param in enumerate(df['Parameter']): v = g.add_vertex() vprop_sens[v] = df.ix[i, sens] vprop_conf[v] = 1 + df.ix[i, '%s_conf' % sens] / df.ix[i, sens] vprop_name[v] = param v_list.append(v) # Make two new columns in second order dataframe that point to the vertices # connected on each row. df2['vertex1'] = -999 df2['vertex2'] = -999 for vertex in v_list: param = g.vp.param[vertex] df2.ix[df2['Parameter_1'] == param, 'vertex1'] = vertex df2.ix[df2['Parameter_2'] == param, 'vertex2'] = vertex # Only allow edges for vertices that we've defined df_edges = df2[(df2['vertex1'] != -999) & (df2['vertex2'] != -999)] # eliminate edges below a certain cutoff value pruned = df_edges[df_edges['S2'] > edge_cutoff] pruned.reset_index(inplace=True) # Add the edges for the graph for i, sensitivity in enumerate(pruned['S2']): v1 = pruned.ix[i, 'vertex1'] v2 = pruned.ix[i, 'vertex2'] e = g.add_edge(v1, v2) # multiply by a number to make the lines visible on the plot eprop_sens[e] = sensitivity * 150 # These are ways you can reference properties of vertices or edges # g.vp.param[g.vertex(77)] # g.vp.param[v_list[0]] print ('Created a graph with %s vertices and %s edges.\nVertices are the ' 'top %s %s values greater than %s.\nOnly S2 values (edges) ' 'greater than %s are included.' % (g.num_vertices(), g.num_edges(), top, sens, min_sens, edge_cutoff)) return g
def evaluate_sampling(self, full_graph: Graph, sampled_graph: Graph, full_partition: BlockState, sampled_graph_partition: BlockState, block_mapping: Dict[int, int], vertex_mapping: Dict[int, int], assignment: np.ndarray): """Evaluates the goodness of the samples. Parameters ---------- full_graph : Graph the full, unsampled Graph object sampled_graph : Graph the sampled graph full_partition : Partition the partitioning results on the full graph sampled_graph_partition : Partition the partitioning results on the sampled graph block_mapping : Dict[int, int] the mapping of blocks from the full graph to the sampled graph vertex_mapping : Dict[int, int] the mapping of vertices from the full graph to the sampled graph assignment : np.ndarray[int] the true vertex-to-community mapping """ ##### # General ##### self.sampled_graph_num_vertices = sampled_graph.num_vertices() self.sampled_graph_num_edges = sampled_graph.num_edges() self.blocks_retained = sampled_graph_partition.get_B( ) / full_partition.get_B() # pseudo_diameter returns a tuple: (diameter, (start_vertex, end_vertex)) self.sampled_graph_diameter = pseudo_diameter(sampled_graph)[0] self.full_graph_diameter = pseudo_diameter(full_graph)[0] for vertex in sampled_graph.vertices(): if (vertex.in_degree() + vertex.out_degree()) == 0: self.sampled_graph_island_vertices += 1 self.sampled_graph_largest_component = extract_largest_component( sampled_graph, directed=False).num_vertices() self.full_graph_largest_component = extract_largest_component( full_graph, directed=False).num_vertices() ###### # Expansion quality (http://portal.acm.org/citation.cfm?doid=1772690.1772762) ###### # Expansion factor = Neighbors of sample / size of sample # Maximum expansion factor = (size of graph - size of sample) / size of sample # Expansion quality = Neighbors of sample / (size of graph - size of sample) # Expansion quality = 1 means sample is at most 1 edge away from entire graph sampled_graph_vertices = set(vertex_mapping.keys()) neighbors = set() for vertex in sampled_graph_vertices: for neighbor in full_graph.get_out_neighbors(vertex): neighbors.add(neighbor) neighbors = neighbors - sampled_graph_vertices self.expansion_quality = len(neighbors) / ( full_graph.num_vertices() - sampled_graph.num_vertices()) ###### # Clustering coefficient ###### self.sampled_graph_clustering_coefficient = global_clustering( sampled_graph)[0] self.full_graph_clustering_coefficient = global_clustering( full_graph)[0] ###### # Info on communities ###### self.get_community_details( assignment, full_partition.get_blocks().get_array(), sampled_graph_partition.get_blocks().get_array(), vertex_mapping) if np.unique( assignment ).size == 1: # Cannot compute below metrics if no true partition is provided return ##### # % difference in ratio of within-block to between-block edges ##### sample_assignment = assignment[np.fromiter(vertex_mapping.keys(), dtype=np.int32)] true_sampled_graph_partition = partition_from_truth( sampled_graph, sample_assignment) sampled_graph_blockmatrix = true_sampled_graph_partition.get_matrix() self.sampled_graph_edge_ratio = sampled_graph_blockmatrix.diagonal( ).sum() / sampled_graph_blockmatrix.sum() true_full_partition = partition_from_truth(full_graph, assignment) full_blockmatrix = true_full_partition.get_matrix() self.graph_edge_ratio = full_blockmatrix.diagonal().sum( ) / full_blockmatrix.sum() ##### # Normalized difference from ideal-block membership ##### membership_size = max(np.max(assignment), np.max(sample_assignment)) + 1 full_graph_membership_nums = np.zeros(membership_size) for block_membership in assignment: full_graph_membership_nums[block_membership] += 1 sampled_graph_membership_nums = np.zeros(membership_size) for block_membership in sample_assignment: sampled_graph_membership_nums[block_membership] += 1 ideal_block_membership_nums = full_graph_membership_nums * \ (sampled_graph.num_vertices() / full_graph.num_vertices()) difference_from_ideal_block_membership_nums = np.abs( ideal_block_membership_nums - sampled_graph_membership_nums) self.difference_from_ideal_sample = np.sum( difference_from_ideal_block_membership_nums / sampled_graph.num_vertices())
class SegmentationGraph(object): """ Class defining the abstract SegmentationGraph object, its attributes and implements methods common to all derived graph classes. The constructor requires the following parameters of the underlying segmentation that will be used to build the graph. Args: scale_factor_to_nm (float): pixel size in nanometers for scaling the graph scale_x (int): x axis length in pixels of the segmentation scale_y (int): y axis length in pixels of the segmentation scale_z (int): z axis length in pixels of the segmentation """ def __init__(self, scale_factor_to_nm, scale_x, scale_y, scale_z): """ Constructor. Args: scale_factor_to_nm (float): pixel size in nanometers for scaling the graph scale_x (int): x axis length in pixels of the segmentation scale_y (int): y axis length in pixels of the segmentation scale_z (int): z axis length in pixels of the segmentation Returns: None """ self.graph = Graph(directed=False) """graph_tool.Graph: a graph object storing the segmentation graph topology, geometry and properties. """ self.scale_factor_to_nm = scale_factor_to_nm """float: pixel size in nanometers for scaling the coordinates and distances in the graph """ self.scale_x = scale_x """int: x axis length in pixels of the segmentation""" self.scale_y = scale_y """int: y axis length in pixels of the segmentation""" self.scale_z = scale_z """int: z axis length in pixels of the segmentation""" # Add "internal property maps" to the graph. # vertex property for storing the xyz coordinates in nanometers of the # corresponding vertex: self.graph.vp.xyz = self.graph.new_vertex_property("vector<float>") # edge property for storing the distance in nanometers between the # connected vertices: self.graph.ep.distance = self.graph.new_edge_property("float") self.coordinates_to_vertex_index = {} """dist: a dictionary mapping the vertex coordinates in nanometers (x, y, z) to the vertex index. """ self.coordinates_pair_connected = {} """dict: a dictionary storing pairs of vertex coordinates in nanometers that are connected by an edge as a key in a tuple form ((x1, y1, z1), (x2, y2, z2)) with value True. """ @staticmethod def distance_between_voxels(voxel1, voxel2): """ Calculates and returns the Euclidean distance between two voxels. Args: voxel1 (tuple): first voxel coordinates in form of a tuple of integers of length 3 (x1, y1, z1) voxel2 (tuple): second voxel coordinates in form of a tuple of integers of length 3 (x2, y2, z2) Returns: the Euclidean distance between two voxels (float) """ if (isinstance(voxel1, tuple) and (len(voxel1) == 3) and isinstance(voxel2, tuple) and (len(voxel2) == 3)): sum_of_squared_differences = 0 for i in range(3): # for each dimension sum_of_squared_differences += (voxel1[i] - voxel2[i]) ** 2 return math.sqrt(sum_of_squared_differences) else: error_msg = ('Tuples of integers of length 3 required as first and ' 'second input.') raise pexceptions.PySegInputError( expr='distance_between_voxels (SegmentationGraph)', msg=error_msg ) def update_coordinates_to_vertex_index(self): """ Updates graph's dictionary coordinates_to_vertex_index. The dictionary maps the vertex coordinates (x, y, z) scaled in nanometers to the vertex index. It has to be updated after purging the graph, because vertices are renumbered, as well as after reading a graph from a file (e.g. before density calculation). Returns: None """ self.coordinates_to_vertex_index = {} for vd in self.graph.vertices(): [x, y, z] = self.graph.vp.xyz[vd] self.coordinates_to_vertex_index[ (x, y, z)] = self.graph.vertex_index[vd] def calculate_density(self, mask=None, target_coordinates=None, verbose=False): """ Calculates ribosome density for each membrane graph vertex. Calculates shortest geodesic distances (d) for each vertex in the graph to each reachable ribosome center mapped on the membrane given by a binary mask with coordinates in pixels or an array of coordinates in nm. Then, calculates a density measure of ribosomes at each vertex or membrane voxel: D = sum {over all reachable ribosomes} (1 / (d + 1)). Adds the density as vertex PropertyMap to the graph. Returns an array with the same shape as the underlying segmentation with the densities plus 1, in order to distinguish membrane voxels with 0 density from the background. Args: mask (numpy.ndarray, optional): a binary mask of the ribosome centers as 3D array where indices are coordinates in pixels (default None) target_coordinates (numpy.ndarray, optional): the ribosome centers coordinates in nm as 2D array in format [[x1, y1, z1], [x2, y2, z2], ...] (default None) verbose (boolean, optional): if True (default False), some extra information will be printed out Returns: a 3D numpy ndarray with the densities + 1 Note: One of the first two parameters, mask or target_coordinates, has to be given. """ import ribosome_density as rd # If a mask is given, find the set of voxels of ribosome centers mapped # on the membrane, 'target_voxels', and rescale them to nm, # 'target_coordinates': if mask is not None: if mask.shape != (self.scale_x, self.scale_y, self.scale_z): error_msg = ("Scales of the input 'mask' have to be equal to " "those set during the generation of the graph.") raise pexceptions.PySegInputError( expr='calculate_density (SegmentationGraph)', msg=error_msg ) # output as a list of tuples [(x1,y1,z1), (x2,y2,z2), ...] in pixels target_voxels = rd.get_foreground_voxels_from_mask(mask) # for rescaling have to convert to an ndarray target_ndarray_voxels = rd.tupel_list_to_ndarray_voxels( target_voxels ) # rescale to nm, output an ndarray [[x1,y1,z1], [x2,y2,z2], ...] target_ndarray_coordinates = (target_ndarray_voxels * self.scale_factor_to_nm) # convert to a list of tuples, which are in nm now target_coordinates = rd.ndarray_voxels_to_tupel_list( target_ndarray_coordinates ) # If target_coordinates are given (in nm), convert them from a numpy # ndarray to a list of tuples: elif target_coordinates is not None: target_coordinates = rd.ndarray_voxels_to_tupel_list( target_coordinates ) # Exit if the target_voxels list is empty: if len(target_coordinates) == 0: error_msg = ("No target voxels were found! Check your input " "('mask' or 'target_coordinates').") raise pexceptions.PySegInputError( expr='calculate_density (SegmentationGraph)', msg=error_msg ) print '%s target voxels' % len(target_coordinates) if verbose: print target_coordinates # Pre-filter the target coordinates to those existing in the graph # (should already all be in the graph, but just in case): target_coordinates_in_graph = [] for target_xyz in target_coordinates: if target_xyz in self.coordinates_to_vertex_index: target_coordinates_in_graph.append(target_xyz) else: error_msg = ('Target (%s, %s, %s) not inside the membrane!' % (target_xyz[0], target_xyz[1], target_xyz[2])) raise pexceptions.PySegInputWarning( expr='calculate_density (SegmentationGraph)', msg=error_msg ) print '%s target coordinates in graph' % len( target_coordinates_in_graph) if verbose: print target_coordinates_in_graph # Get all indices of the target coordinates: target_vertices_indices = [] for target_xyz in target_coordinates_in_graph: v_target_index = self.coordinates_to_vertex_index[target_xyz] target_vertices_indices.append(v_target_index) # Density calculation # Add a new vertex property to the graph, density: self.graph.vp.density = self.graph.new_vertex_property("float") # Dictionary mapping voxel coordinates (for the volume returned later) # to a list of density values falling within that voxel: voxel_to_densities = {} # For each vertex in the graph: for v_membrane in self.graph.vertices(): # Get its coordinates: membrane_xyz = self.graph.vp.xyz[v_membrane] if verbose: print ('Membrane vertex (%s, %s, %s)' % (membrane_xyz[0], membrane_xyz[1], membrane_xyz[2])) # Get a distance map with all pairs of distances between current # graph vertex (membrane_xyz) and target vertices (ribosome # coordinates): dist_map = shortest_distance(self.graph, source=v_membrane, target=target_vertices_indices, weights=self.graph.ep.distance) # Iterate over all shortest distances from the membrane vertex to # the target vertices, while calculating the density: # Initializing: membrane coordinates with no reachable ribosomes # will have a value of 0, those with reachable ribosomes > 0. density = 0 # If there is only one target voxel, dist_map is a single value - # wrap it into a list. if len(target_coordinates_in_graph) == 1: dist_map = [dist_map] for d in dist_map: if verbose: print '\tTarget vertex ...' # if unreachable, the maximum float64 is stored if d == np.finfo(np.float64).max: if verbose: print '\t\tunreachable' else: if verbose: print '\t\td = %s' % d density += 1 / (d + 1) # Add the density of the membrane vertex as a property of the # current vertex in the graph: self.graph.vp.density[v_membrane] = density # Calculate the corresponding voxel of the vertex and add the # density to the list keyed by the voxel in the dictionary: # Scaling the coordinates back from nm to voxels. (Without round # float coordinates are truncated to the next lowest integer.) voxel_x = int(round(membrane_xyz[0] / self.scale_factor_to_nm)) voxel_y = int(round(membrane_xyz[1] / self.scale_factor_to_nm)) voxel_z = int(round(membrane_xyz[2] / self.scale_factor_to_nm)) voxel = (voxel_x, voxel_y, voxel_z) if voxel in voxel_to_densities: voxel_to_densities[voxel].append(density) else: voxel_to_densities[voxel] = [density] if verbose: print '\tdensity = %s' % density if (self.graph.vertex_index[v_membrane] + 1) % 1000 == 0: now = datetime.now() print ('%s membrane vertices processed on: %s-%s-%s %s:%s:%s' % (self.graph.vertex_index[v_membrane] + 1, now.year, now.month, now.day, now.hour, now.minute, now.second)) # Initialize an array scaled like the original segmentation, which will # hold in each membrane voxel the maximal density among the # corresponding vertex coordinates in the graph plus 1 and 0 in each # background (non-membrane) voxel: densities = np.zeros((self.scale_x, self.scale_y, self.scale_z), dtype=np.float16) # The densities array membrane voxels are initialized with 1 in order to # distinguish membrane voxels from the background. for voxel in voxel_to_densities: densities[voxel[0], voxel[1], voxel[2]] = 1 + max( voxel_to_densities[voxel]) if verbose: print 'densities:\n%s' % densities return densities def graph_to_points_and_lines_polys(self, vertices=True, edges=True, verbose=False): """ Generates a VTK PolyData object from the graph with vertices as vertex-cells (containing 1 point) and edges as line-cells (containing 2 points). Args: vertices (boolean, optional): if True (default) vertices are stored a VTK PolyData object as vertex-cells edges (boolean, optional): if True (default) edges are stored a VTK PolyData object as line-cells verbose (boolean, optional): if True (default False), some extra information will be printed out Returns: - vtk.vtkPolyData with vertex-cells - vtk.vtkPolyData with edges as line-cells """ # Initialization poly_verts = vtk.vtkPolyData() poly_lines = vtk.vtkPolyData() points = vtk.vtkPoints() vertex_arrays = list() edge_arrays = list() # Vertex property arrays for prop_key in self.graph.vp.keys(): data_type = self.graph.vp[prop_key].value_type() if (data_type != 'string' and data_type != 'python::object' and prop_key != 'xyz'): if verbose: print '\nvertex property key: %s' % prop_key print 'value type: %s' % data_type if data_type[0:6] != 'vector': # scalar num_components = 1 else: # vector num_components = len( self.graph.vp[prop_key][self.graph.vertex(0)] ) array = TypesConverter().gt_to_vtk(data_type) array.SetName(prop_key) if verbose: print 'number of components: %s' % num_components array.SetNumberOfComponents(num_components) vertex_arrays.append(array) # Edge property arrays for prop_key in self.graph.ep.keys(): data_type = self.graph.ep[prop_key].value_type() if data_type != 'string' and data_type != 'python::object': if verbose: print '\nedge property key: %s' % prop_key print 'value type: %s' % data_type if data_type[0:6] != 'vector': # scalar num_components = 1 else: # vector (all edge properties so far are scalars) # num_components = len( # self.graph.ep[prop_key][self.graph.edge(0, 1)] # ) num_components = 3 if verbose: print ('Sorry, not implemented yet, assuming a vector ' 'with 3 components.') array = TypesConverter().gt_to_vtk(data_type) array.SetName(prop_key) if verbose: print 'number of components: %s' % num_components array.SetNumberOfComponents(num_components) edge_arrays.append(array) if verbose: print '\nvertex arrays length: %s' % len(vertex_arrays) print 'edge arrays length: %s' % len(edge_arrays) # Geometry lut = np.zeros(shape=self.graph.num_vertices(), dtype=np.int) for i, vd in enumerate(self.graph.vertices()): [x, y, z] = self.graph.vp.xyz[vd] points.InsertPoint(i, x, y, z) lut[self.graph.vertex_index[vd]] = i if verbose: print 'number of points: %s' % points.GetNumberOfPoints() # Topology # Vertices verts = vtk.vtkCellArray() if vertices: for vd in self.graph.vertices(): # vd = vertex descriptor verts.InsertNextCell(1) verts.InsertCellPoint(lut[self.graph.vertex_index[vd]]) for array in vertex_arrays: prop_key = array.GetName() n_comp = array.GetNumberOfComponents() data_type = self.graph.vp[prop_key].value_type() data_type = TypesConverter().gt_to_numpy(data_type) array.InsertNextTuple(self.get_vertex_prop_entry( prop_key, vd, n_comp, data_type)) if verbose: print 'number of vertex cells: %s' % verts.GetNumberOfCells() # Edges lines = vtk.vtkCellArray() if edges: for ed in self.graph.edges(): # ed = edge descriptor lines.InsertNextCell(2) lines.InsertCellPoint(lut[self.graph.vertex_index[ed.source()]]) lines.InsertCellPoint(lut[self.graph.vertex_index[ed.target()]]) for array in edge_arrays: prop_key = array.GetName() n_comp = array.GetNumberOfComponents() data_type = self.graph.ep[prop_key].value_type() data_type = TypesConverter().gt_to_numpy(data_type) array.InsertNextTuple(self.get_edge_prop_entry( prop_key, ed, n_comp, data_type)) if verbose: print 'number of line cells: %s' % lines.GetNumberOfCells() # vtkPolyData construction poly_verts.SetPoints(points) poly_lines.SetPoints(points) if vertices: poly_verts.SetVerts(verts) if edges: poly_lines.SetLines(lines) for array in vertex_arrays: poly_verts.GetCellData().AddArray(array) for array in edge_arrays: poly_lines.GetCellData().AddArray(array) return poly_verts, poly_lines def get_vertex_prop_entry(self, prop_key, vertex_descriptor, n_comp, data_type): """ Gets a property value of a vertex for inserting into a VTK vtkDataArray object. This private function is used by the methods graph_to_points_and_lines_polys and graph_to_triangle_poly (the latter of the derived class surface_graphs.TriangleGraph). Args: prop_key (str): name of the desired vertex property vertex_descriptor (graph_tool.Vertex): vertex descriptor of the current vertex n_comp (int): number of components of the array (length of the output tuple) data_type: numpy data type converted from a graph-tool property value type by TypesConverter().gt_to_numpy Returns: a tuple (with length like n_comp) with the property value of the vertex converted to a numpy data type """ prop = list() if n_comp == 1: prop.append(data_type(self.graph.vp[prop_key][vertex_descriptor])) else: for i in range(n_comp): prop.append(data_type( self.graph.vp[prop_key][vertex_descriptor][i])) return tuple(prop) def get_edge_prop_entry(self, prop_key, edge_descriptor, n_comp, data_type): """ Gets a property value of an edge for inserting into a VTK vtkDataArray object. This private function is used by the method graph_to_points_and_lines_polys. Args: prop_key (str): name of the desired vertex property edge_descriptor (graph_tool.Edge): edge descriptor of the current edge n_comp (int): number of components of the array (length of the output tuple) data_type: numpy data type converted from a graph-tool property value type by TypesConverter().gt_to_numpy Returns: a tuple (with length like n_comp) with the property value of the edge converted to a numpy data type """ prop = list() if n_comp == 1: prop.append(data_type(self.graph.ep[prop_key][edge_descriptor])) else: for i in range(n_comp): prop.append(data_type( self.graph.ep[prop_key][edge_descriptor][i])) return tuple(prop) # * The following SegmentationGraph methods are needed for the normal vector # voting algorithm. * def calculate_average_edge_length(self, prop_e=None, value=1): """ Calculates the average edge length in the graph. If a special edge property is specified, includes only the edges where this property equals the given value. If there are no edges in the graph, the given property does not exist or there are no edges with the given property equaling the given value, None is returned. Args: prop_e (str, optional): edge property, if specified only edges where this property equals the given value will be considered value (int, optional): value of the specified edge property an edge has to have in order to be considered (default 1) Returns: the average edge length in the graph (float) or None """ total_edge_length = 0 average_edge_length = None if prop_e is None: print "Considering all edges:" for ed in self.graph.edges(): total_edge_length += self.graph.ep.distance[ed] if self.graph.num_edges() > 0: average_edge_length = total_edge_length / self.graph.num_edges() else: print "There are no edges in the graph!" elif prop_e in self.graph.edge_properties: print ("Considering only edges with property %s equaling value %s " % (prop_e, value)) num_special_edges = 0 for ed in self.graph.edges(): if self.graph.edge_properties[prop_e][ed] == value: num_special_edges += 1 total_edge_length += self.graph.ep.distance[ed] if num_special_edges > 0: average_edge_length = total_edge_length / num_special_edges else: print ("There are no edges with the property %s equaling value " "%s!" % (prop_e, value)) print "Average length: %s" % average_edge_length return average_edge_length def find_geodesic_neighbors(self, v, g_max, verbose=False): """ Finds geodesic neighbor vertices of a given vertex v in the graph that are within a given maximal geodesic distance g_max from it. Also finds the corresponding geodesic distances. All edges are considered. Args: v (graph_tool.Vertex): the source vertex g_max: maximal geodesic distance (in nanometers, if the graph was scaled) verbose (boolean, optional): if True (default False), some extra information will be printed out Returns: a dictionary mapping a neighbor vertex index to the geodesic distance from vertex v """ dist_v = shortest_distance(self.graph, source=v, target=None, weights=self.graph.ep.distance, max_dist=g_max) dist_v = dist_v.get_array() neighbor_id_to_dist = dict() idxs = np.where(dist_v <= g_max)[0] for idx in idxs: dist = dist_v[idx] if dist != 0: # ignore the source vertex itself neighbor_id_to_dist[idx] = dist if verbose: print "%s neighbors" % len(neighbor_id_to_dist) return neighbor_id_to_dist def get_vertex_property_array(self, property_name): """ Gets a numpy array with all values of a vertex property of the graph, printing out the number of values, the minimal and the maximal value. Args: property_name (str): vertex property name Returns: an array (numpy.ndarray) with all values of the vertex property """ if (isinstance(property_name, str) and property_name in self.graph.vertex_properties): values = self.graph.vertex_properties[property_name].get_array() print '%s "%s" values' % (len(values), property_name) print 'min = %s, max = %s' % (min(values), max(values)) return values else: error_msg = ('The input "%s" is not a str object or is not found ' 'in vertex properties of the graph.' % property_name) raise pexceptions.PySegInputError( expr='get_vertex_property_array (SegmentationGraph)', msg=error_msg)
class BaseGraph(object): """ Class representing a graph. We do not use pure graph_tool.Graph for we want to be able to easily change this library. Neither we use inheritance as graph_tool has inconvenient licence. """ def __init__(self): self._g = None self._node_dict = {} self._syn_to_vertex_map = None self._lemma_to_nodes_dict = None self._lu_on_vertex_dict = None def use_graph_tool(self): """ Returns underlying graph_tool.Graph. It should be avoided at all costs. """ return self._g def get_node_for_synset_id(self, syn_id): """ Lazy function to makes the map of synset identifiers to nodes into the graph. The building of map is made only on the first funcion call. The first and the next calls of this function will return the built map. """ if not self._syn_to_vertex_map: self._syn_to_vertex_map = {} for node in self.all_nodes(): if node.synset: synset_id = node.synset.synset_id self._syn_to_vertex_map[synset_id] = node return self._syn_to_vertex_map.get(syn_id, None) def pickle(self, filename): self._g.save(filename) def unpickle(self, filename): self._g = load_graph(filename) def init_graph(self, drctd=False): self._g = Graph(directed=drctd) def copy_graph_from(self, g): self._g = g._g.copy() def set_directed(self, drctd): self._g.set_directed(drctd) def is_directed(self): return self._g.is_directed() def merge_graphs(self, g1, g2): self._g = graph_union(g1._g, g2._g, internal_props=True) # Node operations: def all_nodes(self): for node in self._g.vertices(): yield BaseNode(self._g, node) def create_node_attribute(self, name, kind, value=None): if not self.has_node_attribute(name): node_attr = self._g.new_vertex_property(kind, value) self._g.vertex_properties[name] = node_attr def create_node_attributes(self, node_attributes_list): for attr in node_attributes_list: if not self.has_node_attribute(attr[0]): node_attr = self._g.new_vertex_property(attr[1]) self._g.vertex_properties[attr[0]] = node_attr def has_node_attribute(self, name): """ Checks if a node attribute already exists """ return name in self._g.vertex_properties def delete_node_attribute(self, name): """ Delete node attribute """ del self._g.vertex_properties[name] def add_node(self, name, node_attributes_list=None): if node_attributes_list is None: node_attributes_list = [] if name not in self._node_dict: new_node = self._g.add_vertex() self._node_dict[name] = BaseNode(self._g, new_node) for attr in node_attributes_list: self._g.vertex_properties[attr[0]][new_node] = attr[1] return self._node_dict[name] def get_node(self, name): return self._node_dict[name] def remove_node(self, name): self._g.remove_vertex(self._node_dict[name]._node) del self._node_dict[name] def nodes_filter(self, nodes_to_filter_set, inverted=False, replace=False, soft=False): """ Filters out nodes from set Args: nodes_to_filter_set (Iterable): Nodes which fill be filtered out. inverted (bool): If True, nodes NOT in set will be filtered out. Defaults to False. replace (bool): Replace current filter instead of combining the two. Defaults to False. soft (bool): Hide nodes without removing them so they can be restored with reset_nodes_filter. Defaults to False. """ predicate = lambda node: node not in nodes_to_filter_set self.nodes_filter_conditional(predicate, inverted, replace, soft) def nodes_filter_conditional(self, predicate, inverted=False, replace=False, soft=False): """ Filters node based on a predicate Args: predicate (Callable): Predicate returning False for nodes that should be filtered out. inverted (bool): Invert condition. Defaults to False. replace (bool): Replace current filter instead of combining the two. Defaults to False. soft (bool): Hide nodes without removing them so they can be restored with reset_nodes_filter. Defaults to False. """ (old_filter, old_inverted) = self._g.get_vertex_filter() new_filter = self._g.new_vertex_property("bool") for node in self.all_nodes(): kept = predicate(node) != inverted if not replace and old_filter: old_kept = bool(old_filter[node._node]) != old_inverted kept = kept and old_kept new_filter[node._node] = kept self._g.set_vertex_filter(new_filter, False) if not soft: self.apply_nodes_filter() def apply_nodes_filter(self): """ Removes nodes that are currently filtered out """ self._g.purge_vertices() def reset_nodes_filter(self): """ Clears node filter """ self._g.set_vertex_filter(None) # Edge operations: def num_edges(self): return self._g.num_edges() def all_edges(self): for e in self._g.edges(): yield BaseEdge(self._g, e) def get_edges_between(self, source, target): """ Return all edges between source and target. Source and target can be either BaseNode or integer. """ if isinstance(source, BaseNode): source = source._node if isinstance(target, BaseNode): target = target._node for e in self._g.edge(source, target, all_edges=True): yield BaseEdge(self._g, e) def get_edge(self, source, target, add_missing=False): """ Return some edge between source and target. Source and target can be either BaseNode or integer. """ if isinstance(source, BaseNode): source = source._node if isinstance(target, BaseNode): target = target._node e = self._g.edge(source, target, add_missing) if e is not None: return BaseEdge(self._g, e) else: return None def create_edge_attribute(self, name, kind, value=None): if not self.has_edge_attribute(name): edge_attr = self._g.new_edge_property(kind, value) self._g.edge_properties[name] = edge_attr def alias_edge_attribute(self, name, alias): self._g.edge_properties[alias] = self._g.edge_properties[name] def create_edge_attributes(self, edge_attributes_list): for attr in edge_attributes_list: if not self.has_edge_attribute(attr[0]): edge_attr = self._g.new_edge_property(attr[1]) self._g.edge_properties[attr[0]] = edge_attr def has_edge_attribute(self, name): """ Checks if an edge attribute already existst """ return name in self._g.edge_properties def delete_edge_attribute(self, name): """ Delete edge attribute """ del self._g.edge_properties[name] def add_edge(self, parent, child, edge_attributes_list=None): if edge_attributes_list is None: edge_attributes_list = [] new_edge = self._g.add_edge(parent._node, child._node) for attr in edge_attributes_list: self._g.edge_properties[attr[0]][new_edge] = attr[1] return BaseEdge(self._g, new_edge) def edges_filter(self, edges_to_filter_set): edge_filter = self._g.new_edge_property("bool") for e in self.all_edges(): if e in edges_to_filter_set: edge_filter[e._edge] = False else: edge_filter[e._edge] = True self._g.set_edge_filter(edge_filter) self._g.purge_edges() def ungraph_tool(self, thingy, lemma_on_only_synset_node_dict): """ Converts given data structure so that it no longer have any graph_tool dependencies. """ logger = logging.getLogger(__name__) if type(thingy) == dict: return { self.ungraph_tool(k, lemma_on_only_synset_node_dict): self.ungraph_tool(thingy[k], lemma_on_only_synset_node_dict) for k in thingy } nodes_to_translate = set() for vset in lemma_on_only_synset_node_dict.values(): for v in vset: nodes_to_translate.add(v) if type(thingy) == gt.PropertyMap: dct = {} if thingy.key_type() == 'v': for node in nodes_to_translate: dct[node] = thingy[node.use_graph_tool()] elif thingy.key_type() == 'e': for edge in self.all_edges(): dct[edge] = thingy[edge.use_graph_tool()] else: logger.error('Unknown property type %s', thingy.key_type()) raise NotImplemented return dct def generate_lemma_to_nodes_dict_synsets(self): """ This method generates a utility dictionary, which maps lemmas to corresponding node objects. It is expensive in menas of time needed to generate the dictionary. It should therefore be executed at the beginning of the runtime and later its results should be reused as many times as needed without re-executing the function. """ lemma_to_nodes_dict = defaultdict(set) for node in self.all_nodes(): try: lu_set = node.synset.lu_set except KeyError: continue for lu in lu_set: lemma = lu.lemma.lower() lemma_to_nodes_dict[lemma].add(node) self._lemma_to_nodes_dict = lemma_to_nodes_dict def generate_lemma_to_nodes_dict_lexical_units(self): """ This method generates a utility dictionary, which maps lemmas to corresponding node objects. It is expensive in menas of time needed to generate the dictionary. It should therefore be executed at the beginning of the runtime and later its results should be reused as many times as needed without re-executing the function. """ lemma_to_nodes_dict = defaultdict(set) for node in self.all_nodes(): try: lemma = node.lu.lemma.lower() lemma_to_nodes_dict[lemma].add(node) except: continue self._lemma_to_nodes_dict = lemma_to_nodes_dict @property def lemma_to_nodes_dict(self): return self._lemma_to_nodes_dict def _make_lu_on_v_dict(self): """ Makes dictionary lu on vertex """ lu_on_vertex_dict = defaultdict(set) for node in self.all_nodes(): try: nl = node.lu except Exception: continue if nl: lu_on_vertex_dict[node.lu.lu_id] = node self._lu_on_vertex_dict = lu_on_vertex_dict
class BoardGraphGraphtool(BoardGraphBase): def __init__(self, number_of_vertices, graph_type): super().__init__(number_of_vertices, graph_type) # Graph tool creates directed multigraph by default. self._graph = Graph() self._graph.add_vertex(number_of_vertices) self._graph.vertex_properties["cell"] = self._graph.new_vertex_property( "object", number_of_vertices * [BoardCell()] ) self._graph.edge_properties["direction" ] = self._graph.new_edge_property("object") self._graph.edge_properties["weight" ] = self._graph.new_edge_property("int") def __getitem__(self, position): return self._graph.vp.cell[self._graph.vertex(position)] def __setitem__(self, position, board_cell): self._graph.vp.cell[self._graph.vertex(position)] = board_cell def __contains__(self, position): return position in range(0, self.vertices_count()) def vertices_count(self): return self._graph.num_vertices() def edges_count(self): return self._graph.num_edges() def has_edge(self, source_vertice, target_vertice, direction): for e in self._graph.vertex(source_vertice).out_edges(): if ( int(e.target()) == target_vertice and self._graph.ep.direction[e] == direction ): return True return False def out_edges_count(self, source_vertice, target_vertice): return len([ 1 for e in self._graph.vertex(source_vertice).out_edges() if int(e.target()) == target_vertice ]) def reconfigure_edges(self, width, height, tessellation): """ Uses tessellation object to create all edges in graph. """ self._graph.clear_edges() for source_vertice in self._graph.vertices(): for direction in tessellation.legal_directions: neighbor_vertice = tessellation.neighbor_position( int(source_vertice), direction, board_width=width, board_height=height ) if neighbor_vertice is not None: e = self._graph.add_edge( source_vertice, neighbor_vertice, add_missing=False ) self._graph.ep.direction[e] = direction # TODO: Faster version? # def reconfigure_edges(self, width, height, tessellation): # """ # Uses tessellation object to create all edges in graph. # """ # self._graph.clear_edges() # edges_to_add = [] # directions_to_add = dict() # for source_vertice in self._graph.vertices(): # for direction in tessellation.legal_directions: # neighbor_vertice = tessellation.neighbor_position( # int(source_vertice), direction, # board_width=width, board_height=height # ) # if neighbor_vertice is not None: # edge = (int(source_vertice), neighbor_vertice,) # edges_to_add.append(edge) # if edge not in directions_to_add: # directions_to_add[edge] = deque() # directions_to_add[edge].append(direction) # self._graph.add_edge_list(edges_to_add) if edges_to_add else None # for e in edges_to_add: # e_descriptors = self._graph.edge( # s = self._graph.vertex(e[0]), # t = self._graph.vertex(e[1]), # all_edges = True # ) # for e_descriptor in e_descriptors: # if len(directions_to_add[e]) > 0: # self._graph.ep.direction[e_descriptor] = directions_to_add[e][0] # directions_to_add[e].popleft() def calculate_edge_weights(self): for e in self._graph.edges(): self._graph.ep.weight[e] = self.out_edge_weight(int(e.target())) def neighbor(self, from_position, direction): try: for e in self._graph.vertex(from_position).out_edges(): if self._graph.ep.direction[e] == direction: return int(e.target()) except ValueError as e: raise IndexError(e.args) return None def wall_neighbors(self, from_position): return [ int(n) for n in self._graph.vertex(from_position).out_neighbours() if self[int(n)].is_wall ] def all_neighbors(self, from_position): return [ int(n) for n in self._graph.vertex(from_position).out_neighbours() ] def shortest_path(self, start_position, end_position): try: return [ int(v) for v in shortest_path( g=self._graph, source=self._graph.vertex(start_position), target=self._graph.vertex(end_position), )[0] ] except ValueError: return [] def dijkstra_path(self, start_position, end_position): try: self.calculate_edge_weights() return [ int(v) for v in shortest_path( g=self._graph, source=self._graph.vertex(start_position), target=self._graph.vertex(end_position), weights=self._graph.ep.weight, )[0] ] except ValueError: return [] def position_path_to_direction_path(self, position_path): retv = [] src_vertice_index = 0 for target_vertice in position_path[1:]: source_vertice = position_path[src_vertice_index] src_vertice_index += 1 for out_edge in self._graph.vertex(source_vertice).out_edges(): if int(out_edge.target()) == target_vertice: retv.append(self._graph.ep.direction[out_edge]) return { 'source_position': position_path[0] if position_path else None, 'path': retv }
def gen_fs(dicProperties): np.random.seed() graphFS = Graph() # on définit la fraction des arcs à utiliser la réciprocité f = dicProperties["Reciprocity"] rFracRecip = f/(2.0-f) # on définit toutes les grandeurs de base rInDeg = dicProperties["InDeg"] rOutDeg = dicProperties["OutDeg"] nNodes = 0 nEdges = 0 rDens = 0.0 if "Nodes" in dicProperties.keys(): nNodes = dicProperties["Nodes"] graphFS.add_vertex(nNodes) if "Edges" in dicProperties.keys(): nEdges = dicProperties["Edges"] rDens = nEdges / float(nNodes**2) dicProperties["Density"] = rDens else: rDens = dicProperties["Density"] nEdges = int(np.floor(rDens*nNodes**2)) dicProperties["Edges"] = nEdges else: nEdges = dicProperties["Edges"] rDens = dicProperties["Density"] nNodes = int(np.floor(np.sqrt(nEdges/rDens))) graphFS.add_vertex(nNodes) dicProperties["Nodes"] = nNodes # on définit le nombre d'arcs à créer nArcs = int(np.floor(rDens*nNodes**2)/(1+rFracRecip)) # on définit les paramètres fonctions de probabilité associées F(x) = A x^{-tau} Ai = nArcs*(rInDeg-1)/(nNodes) Ao = nArcs*(rOutDeg-1)/(nNodes) # on définit les moyennes des distributions de pareto 2 = lomax rMi = 1/(rInDeg-2.) rMo = 1/(rOutDeg-2.) # on définit les trois listes contenant les degrés sortant/entrant/bidirectionnels associés aux noeuds i in range(nNodes) lstInDeg = np.random.pareto(rInDeg,nNodes)+1 lstOutDeg = np.random.pareto(rOutDeg,nNodes)+1 lstInDeg = np.floor(np.multiply(Ai/np.mean(lstInDeg), lstInDeg)).astype(int) lstOutDeg = np.floor(np.multiply(Ao/np.mean(lstOutDeg), lstOutDeg)).astype(int) # on génère les stubs qui vont être nécessaires et on les compte nInStubs = int(np.sum(lstInDeg)) nOutStubs = int(np.sum(lstOutDeg)) lstInStubs = np.zeros(np.sum(lstInDeg)) lstOutStubs = np.zeros(np.sum(lstOutDeg)) nStartIn = 0 nStartOut = 0 for vert in range(nNodes): nInDegVert = lstInDeg[vert] nOutDegVert = lstOutDeg[vert] for j in range(np.max([nInDegVert,nOutDegVert])): if j < nInDegVert: lstInStubs[nStartIn+j] += vert if j < nOutDegVert: lstOutStubs[nStartOut+j] += vert nStartOut+=nOutDegVert nStartIn+=nInDegVert # on vérifie qu'on a à peu près le nombre voulu d'edges while nInStubs*(1+rFracRecip)/float(nArcs) < 0.95 : vert = np.random.randint(0,nNodes) nAddInStubs = int(np.floor(Ai/rMi*(np.random.pareto(rInDeg)+1))) lstInStubs = np.append(lstInStubs,np.repeat(vert,nAddInStubs)).astype(int) nInStubs+=nAddInStubs while nOutStubs*(1+rFracRecip)/float(nArcs) < 0.95 : nAddOutStubs = int(np.floor(Ao/rMo*(np.random.pareto(rOutDeg)+1))) lstOutStubs = np.append(lstOutStubs,np.repeat(vert,nAddOutStubs)).astype(int) nOutStubs+=nAddOutStubs # on s'assure d'avoir le même nombre de in et out stubs (1.13 is an experimental correction) nMaxStubs = int(1.13*(2.0*nArcs)/(2*(1+rFracRecip))) if nInStubs > nMaxStubs and nOutStubs > nMaxStubs: np.random.shuffle(lstInStubs) np.random.shuffle(lstOutStubs) lstOutStubs.resize(nMaxStubs) lstInStubs.resize(nMaxStubs) nOutStubs = nInStubs = nMaxStubs elif nInStubs < nOutStubs: np.random.shuffle(lstOutStubs) lstOutStubs.resize(nInStubs) nOutStubs = nInStubs else: np.random.shuffle(lstInStubs) lstInStubs.resize(nOutStubs) nInStubs = nOutStubs # on crée le graphe, les noeuds et les stubs nRecip = int(np.floor(nInStubs*rFracRecip)) nEdges = nInStubs + nRecip +1 # les stubs réciproques np.random.shuffle(lstInStubs) np.random.shuffle(lstOutStubs) lstInRecip = lstInStubs[0:nRecip] lstOutRecip = lstOutStubs[0:nRecip] lstEdges = np.array([np.concatenate((lstOutStubs,lstInRecip)),np.concatenate((lstInStubs,lstOutRecip))]).astype(int) # add edges graphFS.add_edge_list(np.transpose(lstEdges)) remove_self_loops(graphFS) remove_parallel_edges(graphFS) lstIsolatedVert = find_vertex(graphFS, graphFS.degree_property_map("total"), 0) graphFS.remove_vertex(lstIsolatedVert) graphFS.reindex_edges() nNodes = graphFS.num_vertices() nEdges = graphFS.num_edges() rDens = nEdges / float(nNodes**2) # generate types rInhibFrac = dicProperties["InhibFrac"] lstTypesGen = np.random.uniform(0,1,nEdges) lstTypeLimit = np.full(nEdges,rInhibFrac) lstIsExcitatory = np.greater(lstTypesGen,lstTypeLimit) nExc = np.count_nonzero(lstIsExcitatory) epropType = graphFS.new_edge_property("int",np.multiply(2,lstIsExcitatory)-np.repeat(1,nEdges)) # excitatory (True) or inhibitory (False) graphFS.edge_properties["type"] = epropType # and weights if dicProperties["Weighted"]: lstWeights = dicGenWeights[dicProperties["Distribution"]](graphFS,dicProperties,nEdges,nExc) # generate the weights epropW = graphFS.new_edge_property("double",lstWeights) # crée la propriété pour stocker les poids graphFS.edge_properties["weight"] = epropW return graphFS
def reduce_space(q: gt.Graph, a_graph: gt.Graph, min_score, delta, method='MDST'): num_edges_a = [] num_nodes_a = [] used_edges = set() # delta = 0 # set to non-zero for corruption experiments - njp # Hash print('Hashing...') start = time.time() n_idx, e_idx = create_dict(a_graph, 'nValue', 'eValue') # Create an attribute index # print_weights(n_idx,e_idx) print('Done at ' + str(time.time() - start)) alg_start = time.time() num_edges_a.append(a_graph.num_edges()) num_nodes_a.append(a_graph.num_vertices()) print('Calculating MDST') start = time.time() t_score = None t_graph = None if method == 'MDST' and len(used_edges) < 2 * q.num_edges(): t_graph, t_score = calculate_mdst_v2(q, n_idx, e_idx, used_stuff=used_edges) if method == 'Normal' or len(used_edges) >= 2 * q.num_edges(): if method == 'MDST': # stop_here = 1 pass t_graph = calc_random_spanning_tree(q) print('Printing t_graph') print_graph(t_graph) print('t_score', t_score) print('Done at ' + str(time.time() - start)) # print('Further the code for "graph_tool" is not translated. Further calculations are not considered valid.') # Add used stuff to used. # edges_used = t_graph.edges() # used_edges.update(edges_used) # used_edges.update([tuple([e2, e1]) for e1, e2 in edges_used]) #???WHAT FOR??? # also figure out what is unused. tau = q.num_edges() - t_graph.num_edges() # Dumb way of calculating tau print('Matching') start = time.time() mg = sgm_match(t_graph, a_graph, delta, tau, n_idx, e_idx) print('Matching done at ' + str(time.time() - start)) print('Printing mg:') root = None for v in mg.vertices(): if v.in_degree() == 0: root = v break print(root) print_graph(mg) a_prime = subsample_archive_from_matching(a_graph, mg, t_graph, e_idx) # print 'Printing a_prime' # PrintGraph(a_prime) # gt.draw.graph_draw(a_prime, vertex_text=a_prime.vp['old'], vertex_font_size=18, output_size=(300, 300), # output='a_prime.png') # Score the solutions print('Scoring solutions:') start = time.time() scores = [] threshold_matches = [] roots = [n for n in mg.vertices() if n.in_degree() == 0] path_list_vp = vp_map(mg, 'path_list', 'object') for root in roots: sol = path_list_vp[root] for _sol in sol: origin_nodes = [] for _d in _sol: d = [] for i in _d: d.append(a_graph.vp['old'][a_graph.vertex(i)]) origin_nodes.append(tuple(d)) print('trying solutions', origin_nodes) match, score = score_solution(q, a_prime, _sol) origin_match = match_to_original_nodes(a_graph, match) print('Got match with score: ', score) print(origin_match) if score >= min_score: threshold_matches.append(origin_match) scores.append(score) for key, val in origin_match.items(): print('key {} val {}'.format(key, val)) num_edges_a.append(a_prime.num_edges()) num_nodes_a.append(a_prime.num_vertices()) print('Algorithm Post-hash Stages Done at ' + str(time.time() - start)) print('Algorithm Post-hash Stages Done at ' + str(time.time() - alg_start)) # print('Threshold matches: {}'.format(len(threshold_matches))) return num_nodes_a, num_edges_a, threshold_matches, scores
def __init__(self, args: Namespace, graph: Graph) -> None: """Creates a new Evaluation object. Parameters ---------- args : Namespace the command-line arguments graph : Graph the loaded graph to be partitioned """ # CSV file into which to write the results self.args = args self.csv_file = args.csv + ".csv" self.csv_details_file = args.csv + "_details.csv" # Dataset parameters self.block_size_variation = args.blockSizeVar self.block_overlap = args.overlap self.streaming_type = args.type self.num_nodes = graph.num_vertices() self.num_edges = graph.num_edges() # Sampling evaluation self.blocks_retained = 0.0 self.graph_edge_ratio = 0.0 self.difference_from_ideal_sample = 0.0 self.expansion_quality = 0.0 self.sampled_graph_clustering_coefficient = 0.0 self.full_graph_clustering_coefficient = 0.0 self.sampled_graph_diameter = 0 self.full_graph_diameter = 0 self.sampled_graph_largest_component = 0 self.full_graph_largest_component = 0 self.sampled_graph_island_vertices = 0 self.sampled_graph_num_vertices = 0 self.sampled_graph_num_edges = 0 self.sampled_graph_edge_ratio = 0.0 self.sampled_graph_num_blocks_algorithm = 0 self.sampled_graph_num_blocks_truth = 0 self.sampled_graph_accuracy = 0.0 self.sampled_graph_rand_index = 0.0 self.sampled_graph_adjusted_rand_index = 0.0 self.sampled_graph_pairwise_recall = 0.0 self.sampled_graph_pairwise_precision = 0.0 self.sampled_graph_entropy_algorithm = 0.0 self.sampled_graph_entropy_truth = 0.0 self.sampled_graph_entropy_algorithm_given_truth = 0.0 self.sampled_graph_entropy_truth_given_algorithm = 0.0 self.sampled_graph_mutual_info = 0.0 self.sampled_graph_missed_info = 0.0 self.sampled_graph_erroneous_info = 0.0 # Algorithm parameters self.num_block_proposals = args.blockProposals self.beta = args.beta self.sample_size = args.sample_size self.sampling_iterations = args.sample_iterations self.sampling_algorithm = args.sample_type self.delta_entropy_threshold = args.threshold self.nodal_update_threshold_strategy = args.nodal_update_strategy self.nodal_update_threshold_factor = args.factor self.nodal_update_threshold_direction = args.direction # Goodness of partition measures self.num_blocks_algorithm = 0 self.num_blocks_truth = 0 self.accuracy = 0.0 self.rand_index = 0.0 self.adjusted_rand_index = 0.0 self.pairwise_recall = 0.0 self.pairwise_precision = 0.0 self.entropy_algorithm = 0.0 self.entropy_truth = 0.0 self.entropy_algorithm_given_truth = 0.0 self.entropy_truth_given_algorithm = 0.0 self.mutual_info = 0.0 self.missed_info = 0.0 self.erroneous_info = 0.0 self.sampled_graph_description_length = 0.0 self.max_sampled_graph_description_length = 0.0 self.full_graph_description_length = 0.0 self.max_full_graph_description_length = 0.0 self.sampled_graph_modularity = 0.0 self.full_graph_modularity = 0.0 # Algorithm runtime measures self.loading = 0.0 self.sampling = 0.0 self.sampled_graph_partition_time = 0.0 self.total_partition_time = 0.0 self.merge_sample = 0.0 self.propagate_membership = 0.0 self.finetune_membership = 0.0 self.prepare_next_partitions = list() # type: List[float] # self.finetuning_details = None # Community details self.real_communities = dict() # type: Dict[int, int] self.algorithm_communities = dict() # type: Dict[int, int] self.sampled_graph_real_communities = dict() # type: Dict[int, int] self.sampled_graph_algorithm_communities = dict( ) # type: Dict[int, int] self.contingency_table = None # type: np.ndarray self.sampled_graph_contingency_table = None # type: np.ndarray