Example #1
0
class RoadMap(object):
    def __init__(self, mapfile):
        self._mapfile = mapfile
        self.DIRECTION_index = 6
        self.PATHCLASS_index = 20
        self.g = Graph()
        self.g.edge_properties["length"] = self.g.new_edge_property("double")
        self.g.edge_properties["level"] = self.g.new_edge_property("int")
        self.g.vertex_properties["pos"] = self.g.new_vertex_property("vector<double>")
        self.cross_pos_index = {}

    def load(self):
        if self._mapfile[-3:] != 'shp':
            self.g = load_graph(self._mapfile)
            return

        try:
            sf = shapefile.Reader(self._mapfile)
        except Exception as e:
            print(str(e))
            return False
        roads_records = sf.shapeRecords()  # 获取路段信息'
        for road_record in roads_records:
            cross_s_index = self.add_cross(road_record.shape.points[0])
            cross_e_index = self.add_cross(road_record.shape.points[-1])
            self.add_road_edge(cross_s_index, cross_e_index, road_record)
            if int(road_record.record[self.DIRECTION_index]) == 0:  # 若路段是双向车道
                self.add_road_edge(cross_e_index, cross_s_index, road_record)
        return True

    def has_edge(self, s_vertex, e_vertex):
        if self.g.num_vertices() >= max(s_vertex, e_vertex):
            return self.g.edge(s_vertex, e_vertex)
        else:
            return None

    def add_cross(self, cross_pos):
        if cross_pos in self.cross_pos_index:
            return self.cross_pos_index.get(cross_pos)
        else:
            cross_index = self.g.add_vertex()
            self.g.vp.pos[cross_index] = cross_pos
            self.cross_pos_index[cross_pos] = cross_index
            return cross_index

    def add_road_edge(self, s_vertex, e_vertex, road):
        if self.has_edge(s_vertex, e_vertex):
            return self.g.edge(s_vertex, e_vertex)
        else:
            edge = self.g.add_edge(s_vertex, e_vertex)
            self.g.ep.level[edge] = int(road.record[self.PATHCLASS_index])
            self.g.ep.length[edge] = self.road_length(road)
            return edge

    @staticmethod
    def road_length(road):
        length = 0
        for sub_road in zip(road.shape.points[:-1], road.shape.points[1:]):
            length += distance.euclidean(sub_road[0], sub_road[1])
        return length
Example #2
0
def build_graph(df_list, sens='ST', top=410, min_sens=0.01, edge_cutoff=0.0):
    """
    Initializes and constructs a graph where vertices are the parameters
    selected from the first dataframe in 'df_list', subject to the
    constraints set by 'sens', 'top', and 'min_sens'.  Edges are the second
    order sensitivities of the interactions between those vertices,
    with sensitivities greater than 'edge_cutoff'.

    Parameters
    -----------
    df_list     : list
                  A list of two dataframes.  The first dataframe should be
                  the first/total order sensitivities collected by the
                  function data_processing.get_sa_data().
    sens        : str, optional
                  A string with the name of the sensitivity that you would
                  like to use for the vertices ('ST' or 'S1').
    top         : int, optional
                  An integer specifying the number of vertices to display (
                  the top sensitivity values).
    min_sens    : float, optional
                  A float with the minimum sensitivity to allow in the graph.
    edge_cutoff : float, optional
                  A float specifying the minimum second order sensitivity to
                  show as an edge in the graph.

    Returns
    --------
    g : graph-tool object
        a graph-tool graph object of the network described above.  Each
        vertex has properties 'param', 'sensitivity', and 'confidence'
        corresponding to the name of the parameter, value of the sensitivity
        index, and it's confidence interval.  The only edge property is
        'second_sens', the second order sensitivity index for the
        interaction between the two vertices it connects.
    """

    # get the first/total index dataframe and second order dataframe
    df = df_list[0]
    df2 = df_list[1]

    # Make sure sens is ST or S1
    if sens not in set(['ST', 'S1']):
        raise ValueError('sens must be ST or S1')
    # Make sure that there is a second order index dataframe
    try:
        if not df2:
            raise Exception('Missing second order dataframe!')
    except:
        pass

    # slice the dataframes so the resulting graph will only include the top
    # 'top' values of 'sens' greater than 'min_sens'.
    df = df.sort_values(sens, ascending=False)
    df = df.ix[df[sens] > min_sens, :].head(top)
    df = df.reset_index()

    # initialize a graph
    g = Graph()

    vprop_sens = g.new_vertex_property('double')
    vprop_conf = g.new_vertex_property('double')
    vprop_name = g.new_vertex_property('string')
    eprop_sens = g.new_edge_property('double')

    g.vertex_properties['param'] = vprop_name
    g.vertex_properties['sensitivity'] = vprop_sens
    g.vertex_properties['confidence'] = vprop_conf
    g.edge_properties['second_sens'] = eprop_sens

    # keep a list of all the vertices
    v_list = []

    # Add the vertices to the graph
    for i, param in enumerate(df['Parameter']):
        v = g.add_vertex()
        vprop_sens[v] = df.ix[i, sens]
        vprop_conf[v] = 1 + df.ix[i, '%s_conf' % sens] / df.ix[i, sens]
        vprop_name[v] = param
        v_list.append(v)

    # Make two new columns in second order dataframe that point to the vertices
    # connected on each row.
    df2['vertex1'] = -999
    df2['vertex2'] = -999
    for vertex in v_list:
        param = g.vp.param[vertex]
        df2.ix[df2['Parameter_1'] == param, 'vertex1'] = vertex
        df2.ix[df2['Parameter_2'] == param, 'vertex2'] = vertex

    # Only allow edges for vertices that we've defined
    df_edges = df2[(df2['vertex1'] != -999) & (df2['vertex2'] != -999)]
    # eliminate edges below a certain cutoff value
    pruned = df_edges[df_edges['S2'] > edge_cutoff]
    pruned.reset_index(inplace=True)
    # Add the edges for the graph
    for i, sensitivity in enumerate(pruned['S2']):
        v1 = pruned.ix[i, 'vertex1']
        v2 = pruned.ix[i, 'vertex2']
        e = g.add_edge(v1, v2)
        # multiply by a number to make the lines visible on the plot
        eprop_sens[e] = sensitivity * 150

    # These are ways you can reference properties of vertices or edges
    # g.vp.param[g.vertex(77)]
    # g.vp.param[v_list[0]]

    print('Created a graph with %s vertices and %s edges.\nVertices are the '
          'top %s %s values greater than %s.\nOnly S2 values (edges) '
          'greater than %s are included.' %
          (g.num_vertices(), g.num_edges(), top, sens, min_sens, edge_cutoff))

    return g
Example #3
0
    def forest_fire_sample(graph: Graph, num_vertices: int,
                           prev_state: ForestFireSampleState,
                           args: argparse.Namespace) -> SampleState:
        """Forest-fire sampling with forward probability = 0.7. At every stage, select 70% of the neighbors of the
        current sample. Vertices that were not selected are 'blacklisted', and no longer viable for future selection.
        If all vertices are thus 'burnt' before the target number of vertices has been selected, restart sampling from
        a new starting vertex.

        Parameters
        ----------
        graph : Graph
            the filtered graph from which to sample vertices
        num_vertices : int
            number of vertices in the unfiltered graph
        prev_state : UniformRandomSampleState
            the state of the previous sample in the stack. If there is no previous sample, an empty SampleState object
            should be passed in here.
        args : argparse.Namespace
            the command-line arguments provided by the user

        Returns
        -------
        state : SampleState
            the sample state with the sampled vertex ids (Note: these ids correspond to the filtered graph, and have
            to be mapped back to the unfiltered graph)
        """
        state = ForestFireSampleState(graph.num_vertices(), prev_state)
        sample_num = int(
            (num_vertices * (args.sample_size / 100)) / args.sample_iterations)
        sample_num += len(state.sample_idx)
        while len(state.index_set) == 0 or len(
                state.index_set) % sample_num != 0:
            for vertex in state.current_fire_front:
                # add vertex to index set
                if not state.sampled_marker[vertex]:
                    state.sampled_marker[vertex] = True
                    state.burnt_marker[vertex] = True
                    state.num_burnt += 1
                    state.index_set.append(vertex)
                # select edges to burn
                num_to_choose = np.random.geometric(0.7)
                out_neighbors = graph.get_out_neighbors(vertex)
                if len(out_neighbors
                       ) < 1:  # If there are no outgoing neighbors
                    continue
                if len(out_neighbors) <= num_to_choose:
                    num_to_choose = len(out_neighbors)
                mask = np.zeros(len(out_neighbors))
                indexes = np.random.choice(np.arange(len(out_neighbors)),
                                           num_to_choose,
                                           replace=False)
                mask[indexes] = 1
                for index, value in enumerate(mask):
                    neighbor = out_neighbors[index]
                    if value == 1:  # if chosen, add to next frontier
                        if not state.burnt_marker[neighbor]:
                            state.next_fire_front.append(neighbor)
                    state.burnt_marker[
                        neighbor] = True  # mark all neighbors as visited
            if state.num_burnt == graph.num_vertices(
            ):  # all samples are burnt, restart
                state.num_burnt = 0
                state.burnt_marker = [False] * graph.num_vertices()
                state.current_fire_front = [
                    np.random.randint(graph.num_vertices())
                ]
                state.next_fire_front = list()
                continue
            if len(state.next_fire_front) == 0:  # if fire is burnt-out
                state.current_fire_front = [
                    np.random.randint(graph.num_vertices())
                ]
            else:
                state.current_fire_front = list(state.next_fire_front)
                state.next_fire_front = list()
        state.sample_idx = np.asarray(state.index_set[:sample_num])
        return state
Example #4
0
def prepare_hierarchies_neighborhood(
    experiments_path: ExperimentPaths,
    conceptnet_graph_path: Union[str, Path],
    filter_graphs_to_intersected_vertices: bool = True,
) -> pd.DataFrame:
    conceptnet_hierarchy_neighborhood_df_path = (
        experiments_path.experiment_path /
        f"shortest-paths-pairs-{conceptnet_graph_path.stem}-df.pkl")

    logger.info("Prepare graphs")

    conceptnet_graph, vertices_conceptnet = prepare_conceptnet(
        conceptnet_graph_path)
    mlflow.log_metric("conceptnet_graph_nodes",
                      conceptnet_graph.num_vertices())
    mlflow.log_metric("conceptnet_graph_edges", conceptnet_graph.num_edges())

    aspect_graph, experiment_paths = prepare_aspect_graph(experiments_path)
    mlflow.log_metric("aspect_graph_nodes", aspect_graph.num_vertices())
    mlflow.log_metric("aspect_graph_edges", aspect_graph.num_edges())

    aspect_graph_intersected = Graph(aspect_graph)
    conceptnet_graph_intersected = Graph(conceptnet_graph)

    aspect_graph_intersected, conceptnet_graph_intersected = intersected_nodes(
        aspect_graph=aspect_graph_intersected,
        conceptnet_graph=conceptnet_graph_intersected,
        filter_graphs_to_intersected_vertices=
        filter_graphs_to_intersected_vertices,
        property_name="aspect_name",
    )

    mlflow.log_param("filter_graphs_to_intersected_vertices",
                     filter_graphs_to_intersected_vertices)

    mlflow.log_metric(
        "conceptnet_graph_intersected_nodes",
        conceptnet_graph_intersected.num_vertices(),
    )
    mlflow.log_metric("aspect_graph_intersected_nodes",
                      aspect_graph_intersected.num_vertices())

    mlflow.log_metric("conceptnet_graph_intersected_edges",
                      conceptnet_graph_intersected.num_edges())
    mlflow.log_metric("aspect_graph_intersected_edges",
                      aspect_graph_intersected.num_edges())

    aspect_names_intersected = list(
        aspect_graph_intersected.vertex_properties["aspect_name"])

    vertices_name_to_aspect_vertex = dict(
        zip(aspect_graph.vertex_properties["aspect_name"],
            aspect_graph.vertices()))
    aspect_graph_vertices_intersected = [
        vertices_name_to_aspect_vertex[a] for a in aspect_names_intersected
    ]
    shortest_distances_aspect_graph = np.array([
        shortest_distance(
            g=aspect_graph,
            source=v,
            target=aspect_graph_vertices_intersected,
            directed=True,
        ) for v in tqdm(aspect_graph_vertices_intersected,
                        desc="Aspect graph shortest paths...")
    ])

    conceptnet_vertices_intersected = [
        vertices_conceptnet[a] for a in aspect_names_intersected
    ]

    mlflow.log_metric("conceptnet_vertices_intersected len",
                      len(conceptnet_vertices_intersected))
    mlflow.log_metric("aspect_graph_vertices_intersected len",
                      len(aspect_graph_vertices_intersected))
    assert len(conceptnet_vertices_intersected) == len(
        aspect_graph_vertices_intersected
    ), "Wrong sequence of vertices in both graphs"

    shortest_distances_conceptnet = np.array([
        shortest_distance(
            g=conceptnet_graph,
            source=v,
            target=conceptnet_vertices_intersected,
            directed=True,
        ) for v in tqdm(conceptnet_vertices_intersected,
                        desc="Conceptnet shortest paths...")
    ])

    pairs = []
    for aspect_1_idx, aspect_1 in tqdm(enumerate(aspect_names_intersected),
                                       desc="Pairs distances..."):
        for aspect_2_idx, aspect_2 in enumerate(aspect_names_intersected):
            pairs.append((
                aspect_1,
                aspect_2,
                shortest_distances_aspect_graph[aspect_1_idx][aspect_2_idx],
                shortest_distances_conceptnet[aspect_1_idx][aspect_2_idx],
            ))

    pairs_df = pd.DataFrame(
        pairs,
        columns=[
            "aspect_1",
            "aspect_2",
            "shortest_distance_aspect_graph",
            "shortest_distance_conceptnet",
        ],
    )

    logger.info("Dump DataFrame with pairs")
    pairs_df.to_pickle(conceptnet_hierarchy_neighborhood_df_path.as_posix())
    mlflow.log_artifact(conceptnet_hierarchy_neighborhood_df_path.as_posix())
    mlflow.log_metric("conceptnet_hierarchy_neighborhood_df_len",
                      len(pairs_df))
    logger.info(
        f"DataFrame with pairs dumped in: {experiment_paths.conceptnet_hierarchy_neighborhood.as_posix()}"
    )

    return pairs_df
Example #5
0
def budgeted_spc_querying(g : graph_tool.Graph, paths, y, weights=None, budget=50,  compute_hulls_between_queries=False, hull_as_optimization=False, use_adjacency=False):
    '''

    :param g:
    :param paths: list of paths
    :param y: ground truth
    :param weight:
    :return:
    '''

    if use_adjacency:
        dist_map = graph_tool.topology.shortest_distance(g, weights=weights).get_2d_array(range(g.num_vertices())).T

        adjacency = dist_map.copy()
        adjacency[adjacency > 1] = 0
    else:
        #to prevent overflow etc.
        dist_map = graph_tool.topology.shortest_distance(g, weights=weights).get_2d_array(
            range(g.num_vertices())).T.astype(np.double)
        dist_map[dist_map > g.num_vertices()] = np.inf

    #hack to allow both endpoints as candidates:
    #new_spc = paths.copy()
    #for p in paths:
    #    new_spc.append(p[::-1])

    #paths = new_spc

    comps, hist = graph_tool.topology.label_components(g)
    n = g.num_vertices()
    classes = np.unique(y)
    known_labels = -np.ones(g.num_vertices())*np.inf

    candidates = np.zeros(len(paths), dtype=np.int)
    candidate_generators = np.zeros(len(paths), dtype=np.object)
    for i, path in enumerate(paths):
        candidate_generators[i] = binarySearchGenerator(known_labels, path, 0, len(path)-1)
        candidates[i] = next(candidate_generators[i])

    candidate_hulls = np.zeros(len(paths), dtype=np.object)
    candidate_hull_sizes = np.zeros(len(paths))
    classes_hull_sizes = np.zeros(len(paths))
    known_classes = dict()
    classes_hulls = dict()

    deg = g.degree_property_map("total").a
    deg = deg*deg

    for j, candidate in enumerate(candidates):
        candidate_hulls[j] = dict()

    for c in classes:
        known_classes[c] = set()
        classes_hulls[c] = dict()
        for j, candidate in enumerate(candidates):
            temp = np.zeros(n, dtype=np.bool)
            classes_hulls[c] = temp.copy() #empty hulls
            temp[paths[j][candidate]] = True
            candidate_hulls[j][c] = temp #singleton hull
    for z in range(budget):
        #compute most promising vertex
        for p in range(len(paths)):
            if known_labels[paths[p][candidates[p]]] == -np.inf:
                candidate_hull_sizes[p] = helper_sum_sizes(candidate_hulls[p], classes_hulls)
            else:
                candidate_hull_sizes[p] = -1

        maximizers = np.where(candidate_hull_sizes == np.max(candidate_hull_sizes))[0]

        #prefer not queried paths
        if np.any(candidates[maximizers] == 0):
            maximizers = maximizers[np.where(candidates[maximizers] == 0)[0]]
            p_star = np.random.choice(maximizers)
        else:
            p_star = np.random.choice(maximizers)
        candidate = paths[p_star][candidates[p_star]]

        #query it
        known_labels[candidate] = y[candidate]

        #update data structures
        known_classes[known_labels[candidate]].add(candidate)
        classes_hulls[known_labels[candidate]] = candidate_hulls[p_star][known_labels[candidate]]



        for j in range(len(candidates)):
            path = paths[j]
            while known_labels[path[candidates[j]]] != -np.inf or path[candidates[j]] in classes_hulls[known_labels[candidate]]:
                try:
                    candidates[j] = next(candidate_generators[j])
                except StopIteration:
                    break
            #if not candidate_hulls[j][c][candidate]:
            #if not classes_hulls[c][path[candidates[j]]]:
                #classes_hulls_c_set = set(np.where(classes_hulls[c])[0])
                #old_hull_with_new_candidate = list(classes_hulls_c_set)
                #old_hull_with_new_candidate.append(path[candidates[j]])
            for c in classes:
                candidate_hulls[j][c] = compute_hull(g, list(known_classes[c].union([path[candidates[j]]])), weights, dist_map, comps, hist, hull_as_optimization)#, classes_hulls_c_set)

        '''if compute_hulls_between_queries:
            for c in classes:
                known_labels[np.where(compute_hull(g, np.where(known_labels == c)[0], weights, dist_map, comps, hist))[0]] = c'''

        if compute_hulls_between_queries:
            known_labels_augmented = known_labels.copy()
            known_classes_hulls_temp = np.zeros((n, len(classes)), dtype=np.bool)
            for i, c in enumerate(classes):
                known_classes_hulls_temp[:,i] = compute_hull(g, np.where(known_labels_augmented == c)[0], weights, dist_map, comps, hist, compute_closure=False)

            for i, c in enumerate(classes):
                only_c = known_classes_hulls_temp[:,i] & ~(np.sum(known_classes_hulls_temp[:,np.arange(len(classes))!=i],axis=1).astype(bool))
                known_labels_augmented[only_c] = c

        else:
            known_labels_augmented = known_labels

        if use_adjacency:
            prediction = label_propagation(adjacency, known_labels_augmented, y, use_adjacency=use_adjacency)
        else:
            prediction = label_propagation(dist_map, known_labels_augmented, y, use_adjacency=use_adjacency)
        print("======")
        print(z+1, np.sum(known_labels>-np.inf))
        print("accuracy", np.sum(prediction==y)/y.size)
        #print(known_classes)
        
    return known_labels
Example #6
0
def spc_querying_naive_one_convex(g : graph_tool.Graph, paths, y, convex_label, epsilon=0.5, weight=None, binary_search=False,closed_interval=False):
    '''

    :param g:
    :param paths: list of paths
    :param y: ground truth
    :param weight:
    :return:
    '''
    print("epsilon", epsilon)
    known_labels = -np.ones(g.num_vertices())*np.inf
    budget = np.zeros(g.num_vertices())

    non_convex_label = np.unique(y)
    non_convex_label = non_convex_label[int(np.where(non_convex_label==convex_label)[0]+1)%2]
    for i, full_path in enumerate(paths):

        if np.any(known_labels[full_path] == convex_label):
            smallest = np.min(np.where(known_labels[full_path] == convex_label)[0])
            biggest = np.max(np.where(known_labels[full_path] == convex_label)[0])

            if np.any(known_labels[full_path[:smallest]] == non_convex_label):
                known_labels[full_path[:np.max(np.where(known_labels[full_path[:smallest]] == non_convex_label)[0])]] = non_convex_label

            if np.any(known_labels[full_path[biggest:]] == non_convex_label):
                known_labels[full_path[np.min(np.where(known_labels[full_path[biggest:]] == non_convex_label)[0]):]] = non_convex_label

        path = np.array(full_path)[known_labels[full_path] == -np.inf]

        for z in range(1,int(np.ceil(1/epsilon))):
            j = int(z*(np.ceil(epsilon*len(path))))
            while j < len(path) and known_labels[path[j]] != -np.inf:
                j += 1
            if j >= len(path):
                break

            if np.sum(np.where(known_labels==-np.inf)[0]) <= epsilon*len(path):
                conv_region = np.where(known_labels[path] == convex_label)[0]
                if conv_region.size > 0:
                    known_labels[path] = known_labels[path[0]]
                    known_labels[np.min(conv_region):np.max(conv_region)+1] = convex_label
                break

            known_labels[path[j]] = y[path[j]]
            budget[i] += 1

        if np.any(known_labels[path] == convex_label):
            smallest = np.min(np.where(known_labels[path] == convex_label)[0])
            biggest = np.max(np.where(known_labels[path] == convex_label)[0])
            if binary_search:
                l_path = path[:smallest+1]
                if known_labels[l_path[0]] == -np.inf:
                    known_labels[l_path[0]] = y[l_path[0]]
                    budget[i] += 1
                label_budget, new_labels = binarySearch(y[l_path], 0, len(l_path) - 1, known_labels[l_path[0]], known_labels[l_path])
                known_labels[l_path] = new_labels
                budget[i] += label_budget

                r_path = path[biggest:]
                if known_labels[r_path[-1]] == -np.inf:
                    known_labels[r_path[-1]] = y[r_path[-1]]
                    budget[i] += 1
                label_budget, new_labels = binarySearch(y[r_path], 0, len(r_path) - 1, known_labels[r_path[0]], known_labels[r_path])
                known_labels[r_path] = new_labels
                budget[i] += label_budget
            else:
                j_minus = smallest -1
                while j_minus > 0 and known_labels[path[j_minus]] == -np.inf:
                    j_minus -= 1
                j_plus = biggest+ 1
                while j_plus < len(path) and known_labels[path[j_plus]] == -np.inf:
                    j_plus += 1

                if known_labels[path[j_minus + (smallest - j_minus)//2]] == -np.inf:
                    known_labels[path[j_minus + (smallest - j_minus)//2]] = y[path[j_minus + (smallest - j_minus)//2]]
                    budget[i] += 1
                if known_labels[path[biggest + (j_plus - biggest) // 2]] == -np.inf:
                    known_labels[path[biggest + (j_plus - biggest) // 2]] = y[path[biggest + (j_plus - biggest) // 2]]
                    budget[i] += 1

                smallest = np.min(np.where(known_labels[path] == convex_label)[0])
                biggest = np.max(np.where(known_labels[path] == convex_label)[0])

                known_labels[path[smallest:biggest+1]] = convex_label

                if smallest > 0:
                    known_labels[path[:smallest-1]] = non_convex_label
                if biggest < len(path)-1:
                    known_labels[path[biggest+1:]] = non_convex_label
        else:
            known_labels[path] = non_convex_label

        convex_class = closure.compute_hull(g, np.where(known_labels == convex_label)[0], weight)
        known_labels[convex_class] = convex_label
    return known_labels, budget
Example #7
0
class SegmentationGraph(object):
    """
    Class defining the abstract SegmentationGraph object, its attributes and
    implements methods common to all derived graph classes.

    The constructor requires the following parameters of the underlying
    segmentation that will be used to build the graph.
    """
    def __init__(self):
        """
        Constructor of the abstract SegmentationGraph object.

        Returns:
            None
        """
        self.graph = Graph(directed=False)
        """graph_tool.Graph: a graph object storing the segmentation graph
        topology, geometry and properties (initially empty).
        """

        # Add "internal property maps" to the graph.
        # vertex property for storing the xyz coordinates of the corresponding
        # vertex:
        self.graph.vp.xyz = self.graph.new_vertex_property("vector<float>")
        # edge property for storing the distance between the connected vertices:
        self.graph.ep.distance = self.graph.new_edge_property("float")

        self.coordinates_to_vertex_index = {}
        """dict: a dictionary mapping the vertex coordinates (x, y, z) to the
        vertex index.
        """
        self.coordinates_pair_connected = set()
        """set: a set storing pairs of vertex coordinates that are
        connected by an edge in a tuple form ((x1, y1, z1), (x2, y2, z2)).
        """

    @staticmethod
    def distance_between_voxels(voxel1, voxel2):
        """
        Calculates and returns the Euclidean distance between two voxels.

        Args:
            voxel1 (tuple): first voxel coordinates in form of a tuple of
                floats of length 3 (x1, y1, z1)
            voxel2 (tuple): second voxel coordinates in form of a tuple of
                floats of length 3 (x2, y2, z2)

        Returns:
            the Euclidean distance between two voxels (float)
        """
        if (isinstance(voxel1, tuple) and (len(voxel1) == 3)
                and isinstance(voxel2, tuple) and (len(voxel2) == 3)):
            sum_of_squared_differences = 0
            for i in range(3):  # for each dimension
                sum_of_squared_differences += (voxel1[i] - voxel2[i])**2
            return math.sqrt(sum_of_squared_differences)
        else:
            raise pexceptions.PySegInputError(
                expr='distance_between_voxels (SegmentationGraph)',
                msg=('Tuples of integers of length 3 required as first and '
                     'second input.'))

    def update_coordinates_to_vertex_index(self):
        """
        Updates graph's dictionary coordinates_to_vertex_index.

        The dictionary maps the vertex coordinates (x, y, z) to the vertex
        index. It has to be updated after purging the graph, because vertices
        are renumbered, as well as after reading a graph from a file (e.g.
        before density calculation).

        Returns:
            None
        """
        self.coordinates_to_vertex_index = {}
        for vd in self.graph.vertices():
            [x, y, z] = self.graph.vp.xyz[vd]
            self.coordinates_to_vertex_index[(x, y,
                                              z)] = self.graph.vertex_index[vd]

    def calculate_density(self,
                          size,
                          scale,
                          mask=None,
                          target_coordinates=None,
                          verbose=False):
        """
        Calculates ribosome density for each membrane graph vertex.

        Calculates shortest geodesic distances (d) for each vertex in the graph
        to each reachable ribosome center mapped on the membrane given by a
        binary mask with coordinates in pixels or an array of coordinates in
        given units.
        Then, calculates a density measure of ribosomes at each vertex or
        membrane voxel: D = sum {over all reachable ribosomes} (1 / (d + 1)).
        Adds the density as vertex PropertyMap to the graph. Returns an array
        with the same shape as the underlying segmentation with the densities
        plus 1, in order to distinguish membrane voxels with 0 density from the
        background.

        Args:
            size (tuple): size in voxels (X, Y, Z) of the original segmentation
            scale (tuple): pixel size (X, Y, Z) in given units of the original
                segmentation
            mask (numpy.ndarray, optional): a binary mask of the ribosome
                centers as 3D array where indices are coordinates in pixels
                (default None)
            target_coordinates (numpy.ndarray, optional): the ribosome centers
                coordinates in given units as 2D array in format
                [[x1, y1, z1], [x2, y2, z2], ...] (default None)
            verbose (boolean, optional): if True (default False), some extra
                information will be printed out

        Returns:
            a 3D numpy ndarray with the densities + 1

        Note:
            One of the two parameters, mask or target_coordinates, has to be
            given.
        """
        from . import ribosome_density as rd
        # If a mask is given, find the set of voxels of ribosome centers mapped
        # on the membrane, 'target_voxels', and rescale them to units,
        # 'target_coordinates':
        if mask is not None:
            if mask.shape != size:
                raise pexceptions.PySegInputError(
                    expr='calculate_density (SegmentationGraph)',
                    msg=("Size of the input 'mask' have to be equal to those "
                         "set during the generation of the graph."))
            # output as a list of tuples [(x1,y1,z1), (x2,y2,z2), ...] in pixels
            target_voxels = rd.get_foreground_voxels_from_mask(mask)
            # for rescaling have to convert to an ndarray
            target_ndarray_voxels = rd.tupel_list_to_ndarray_voxels(
                target_voxels)
            # rescale to units, output an ndarray [[x1,y1,z1], [x2,y2,z2], ...]
            target_ndarray_coordinates = (target_ndarray_voxels *
                                          np.asarray(scale))
            # convert to a list of tuples, which are in units now
            target_coordinates = rd.ndarray_voxels_to_tupel_list(
                target_ndarray_coordinates)
        # If target_coordinates are given (in units), convert them from a numpy
        # ndarray to a list of tuples:
        elif target_coordinates is not None:
            target_coordinates = rd.ndarray_voxels_to_tupel_list(
                target_coordinates)
        # Exit if the target_voxels list is empty:
        if len(target_coordinates) == 0:
            raise pexceptions.PySegInputError(
                expr='calculate_density (SegmentationGraph)',
                msg="No target voxels were found! Check your input ('mask' or "
                "'target_coordinates').")
        print('{} target voxels'.format(len(target_coordinates)))
        if verbose:
            print(target_coordinates)

        # Pre-filter the target coordinates to those existing in the graph
        # (should already all be in the graph, but just in case):
        target_coordinates_in_graph = []
        for target_xyz in target_coordinates:
            if target_xyz in self.coordinates_to_vertex_index:
                target_coordinates_in_graph.append(target_xyz)
            else:
                raise pexceptions.PySegInputWarning(
                    expr='calculate_density (SegmentationGraph)',
                    msg=('Target ({}, {}, {}) not inside the membrane!'.format(
                        target_xyz[0], target_xyz[1], target_xyz[2])))

        print('{} target coordinates in graph'.format(
            len(target_coordinates_in_graph)))
        if verbose:
            print(target_coordinates_in_graph)

        # Get all indices of the target coordinates:
        target_vertices_indices = []
        for target_xyz in target_coordinates_in_graph:
            v_target_index = self.coordinates_to_vertex_index[target_xyz]
            target_vertices_indices.append(v_target_index)

        # Density calculation
        # Add a new vertex property to the graph, density:
        self.graph.vp.density = self.graph.new_vertex_property("float")
        # Dictionary mapping voxel coordinates (for the volume returned later)
        # to a list of density values falling within that voxel:
        voxel_to_densities = {}

        # For each vertex in the graph:
        for v_membrane in self.graph.vertices():
            # Get its coordinates:
            membrane_xyz = self.graph.vp.xyz[v_membrane]
            if verbose:
                print('Membrane vertex ({}, {}, {})'.format(
                    membrane_xyz[0], membrane_xyz[1], membrane_xyz[2]))
            # Get a distance map with all pairs of distances between current
            # graph vertex (membrane_xyz) and target vertices (ribosome
            # coordinates):
            dist_map = shortest_distance(self.graph,
                                         source=v_membrane,
                                         target=target_vertices_indices,
                                         weights=self.graph.ep.distance)

            # Iterate over all shortest distances from the membrane vertex to
            # the target vertices, while calculating the density:
            # Initializing: membrane coordinates with no reachable ribosomes
            # will have a value of 0, those with reachable ribosomes > 0.
            density = 0
            # If there is only one target voxel, dist_map is a single value -
            # wrap it into a list.
            if len(target_coordinates_in_graph) == 1:
                dist_map = [dist_map]
            for d in dist_map:
                if verbose:
                    print('\tTarget vertex ...')
                # if unreachable, the maximum float64 is stored
                if d == np.finfo(np.float64).max:
                    if verbose:
                        print('\t\tunreachable')
                else:
                    if verbose:
                        print('\t\td = {}'.format(d))
                    density += 1 / (d + 1)

            # Add the density of the membrane vertex as a property of the
            # current vertex in the graph:
            self.graph.vp.density[v_membrane] = density

            # Calculate the corresponding voxel of the vertex and add the
            # density to the list keyed by the voxel in the dictionary:
            # Scaling the coordinates back from units to voxels. (Without round
            # float coordinates are truncated to the next lowest integer.)
            voxel_x = int(round(membrane_xyz[0] / scale[0]))
            voxel_y = int(round(membrane_xyz[1] / scale[1]))
            voxel_z = int(round(membrane_xyz[2] / scale[2]))
            voxel = (voxel_x, voxel_y, voxel_z)
            if voxel in voxel_to_densities:
                voxel_to_densities[voxel].append(density)
            else:
                voxel_to_densities[voxel] = [density]

            if verbose:
                print('\tdensity = {}'.format(density))
            if (self.graph.vertex_index[v_membrane] + 1) % 1000 == 0:
                now = datetime.now()
                print('{} membrane vertices processed on: {}-{}-{} {}:{}:{}'.
                      format(self.graph.vertex_index[v_membrane] + 1, now.year,
                             now.month, now.day, now.hour, now.minute,
                             now.second))

        # Initialize an array scaled like the original segmentation, which will
        # hold in each membrane voxel the maximal density among the
        # corresponding vertex coordinates in the graph plus 1 and 0 in each
        # background (non-membrane) voxel:
        densities = np.zeros(size, dtype=np.float16)
        # The densities array membrane voxels are initialized with 1 in order to
        # distinguish membrane voxels from the background.
        for voxel in voxel_to_densities:
            densities[voxel[0], voxel[1],
                      voxel[2]] = 1 + max(voxel_to_densities[voxel])
        if verbose:
            print('densities:\n{}'.format(densities))
        return densities

    def graph_to_points_and_lines_polys(self,
                                        vertices=True,
                                        edges=True,
                                        verbose=False):
        """
        Generates a VTK PolyData object from the graph with vertices as
        vertex-cells (containing 1 point) and edges as line-cells (containing 2
        points).

        Args:
            vertices (boolean, optional): if True (default) vertices are stored
                a VTK PolyData object as vertex-cells
            edges (boolean, optional): if True (default) edges are stored a VTK
                PolyData object as line-cells
            verbose (boolean, optional): if True (default False), some extra
                information will be printed out

        Returns:
            - vtk.vtkPolyData with vertex-cells
            - vtk.vtkPolyData with edges as line-cells
        """
        # Initialization
        poly_verts = vtk.vtkPolyData()
        poly_lines = vtk.vtkPolyData()
        points = vtk.vtkPoints()
        vertex_arrays = list()
        edge_arrays = list()
        # Vertex property arrays
        for prop_key in list(self.graph.vp.keys()):
            data_type = self.graph.vp[prop_key].value_type()
            if (data_type != 'string' and data_type != 'python::object'
                    and prop_key != 'xyz'):
                if verbose:
                    print('\nvertex property key: {}'.format(prop_key))
                    print('value type: {}'.format(data_type))
                if data_type[0:6] != 'vector':  # scalar
                    num_components = 1
                else:  # vector
                    num_components = len(
                        self.graph.vp[prop_key][self.graph.vertex(0)])
                array = TypesConverter().gt_to_vtk(data_type)
                array.SetName(prop_key)
                if verbose:
                    print('number of components: {}'.format(num_components))
                array.SetNumberOfComponents(num_components)
                vertex_arrays.append(array)
        # Edge property arrays
        for prop_key in list(self.graph.ep.keys()):
            data_type = self.graph.ep[prop_key].value_type()
            if data_type != 'string' and data_type != 'python::object':
                if verbose:
                    print('\nedge property key: {}'.format(prop_key))
                    print('value type: {}'.format(data_type))
                if data_type[0:6] != 'vector':  # scalar
                    num_components = 1
                else:  # vector (all edge properties so far are scalars)
                    # num_components = len(
                    #     self.graph.ep[prop_key][self.graph.edge(0, 1)])
                    num_components = 3
                    if verbose:
                        print('Sorry, not implemented yet, assuming a vector '
                              'with 3 components.')
                array = TypesConverter().gt_to_vtk(data_type)
                array.SetName(prop_key)
                if verbose:
                    print('number of components: {}'.format(num_components))
                array.SetNumberOfComponents(num_components)
                edge_arrays.append(array)
        if verbose:
            print('\nvertex arrays length: {}'.format(len(vertex_arrays)))
            print('edge arrays length: {}'.format(len(edge_arrays)))

        # Geometry
        lut = np.zeros(shape=self.graph.num_vertices(), dtype=np.int)
        for i, vd in enumerate(self.graph.vertices()):
            [x, y, z] = self.graph.vp.xyz[vd]
            points.InsertPoint(i, x, y, z)
            lut[self.graph.vertex_index[vd]] = i
        if verbose:
            print('number of points: {}'.format(points.GetNumberOfPoints()))

        # Topology
        # Vertices
        verts = vtk.vtkCellArray()
        if vertices:
            for vd in self.graph.vertices():  # vd = vertex descriptor
                verts.InsertNextCell(1)
                verts.InsertCellPoint(lut[self.graph.vertex_index[vd]])
                for array in vertex_arrays:
                    prop_key = array.GetName()
                    n_comp = array.GetNumberOfComponents()
                    data_type = self.graph.vp[prop_key].value_type()
                    data_type = TypesConverter().gt_to_numpy(data_type)
                    array.InsertNextTuple(
                        self.get_vertex_prop_entry(prop_key, vd, n_comp,
                                                   data_type))
            if verbose:
                print('number of vertex cells: {}'.format(
                    verts.GetNumberOfCells()))
        # Edges
        lines = vtk.vtkCellArray()
        if edges:
            for ed in self.graph.edges():  # ed = edge descriptor
                lines.InsertNextCell(2)
                lines.InsertCellPoint(
                    lut[self.graph.vertex_index[ed.source()]])
                lines.InsertCellPoint(
                    lut[self.graph.vertex_index[ed.target()]])
                for array in edge_arrays:
                    prop_key = array.GetName()
                    n_comp = array.GetNumberOfComponents()
                    data_type = self.graph.ep[prop_key].value_type()
                    data_type = TypesConverter().gt_to_numpy(data_type)
                    array.InsertNextTuple(
                        self.get_edge_prop_entry(prop_key, ed, n_comp,
                                                 data_type))
            if verbose:
                print('number of line cells: {}'.format(
                    lines.GetNumberOfCells()))

        # vtkPolyData construction
        poly_verts.SetPoints(points)
        poly_lines.SetPoints(points)
        if vertices:
            poly_verts.SetVerts(verts)
        if edges:
            poly_lines.SetLines(lines)
        for array in vertex_arrays:
            poly_verts.GetCellData().AddArray(array)
        for array in edge_arrays:
            poly_lines.GetCellData().AddArray(array)

        return poly_verts, poly_lines

    def get_vertex_prop_entry(self, prop_key, vertex_descriptor, n_comp,
                              data_type):
        """
        Gets a property value of a vertex for inserting into a VTK vtkDataArray
        object.

        This function is used by the methods graph_to_points_and_lines_polys and
        graph_to_triangle_poly (the latter of the derived classes PointGraph and
        TriangleGraph (in surface_graphs).

        Args:
            prop_key (str): name of the desired vertex property
            vertex_descriptor (graph_tool.Vertex): vertex descriptor of the
                current vertex
            n_comp (int): number of components of the array (length of the
                output tuple)
            data_type: numpy data type converted from a graph-tool property
                value type by TypesConverter().gt_to_numpy

        Returns:
            a tuple (with length like n_comp) with the property value of the
            vertex converted to a numpy data type
        """
        prop = list()
        if n_comp == 1:
            prop.append(data_type(self.graph.vp[prop_key][vertex_descriptor]))
        else:
            for i in range(n_comp):
                prop.append(
                    data_type(self.graph.vp[prop_key][vertex_descriptor][i]))
        return tuple(prop)

    def get_edge_prop_entry(self, prop_key, edge_descriptor, n_comp,
                            data_type):
        """
        Gets a property value of an edge for inserting into a VTK vtkDataArray
        object.

        This private function is used by the method
        graph_to_points_and_lines_polys.

        Args:
            prop_key (str): name of the desired vertex property
            edge_descriptor (graph_tool.Edge): edge descriptor of the current
                edge
            n_comp (int): number of components of the array (length of the
                output tuple)
            data_type: numpy data type converted from a graph-tool property
                value type by TypesConverter().gt_to_numpy

        Returns:
            a tuple (with length like n_comp) with the property value of the
            edge converted to a numpy data type
        """
        prop = list()
        if n_comp == 1:
            prop.append(data_type(self.graph.ep[prop_key][edge_descriptor]))
        else:
            for i in range(n_comp):
                prop.append(
                    data_type(self.graph.ep[prop_key][edge_descriptor][i]))
        return tuple(prop)

    # * The following SegmentationGraph methods are needed for the normal vector
    # voting algorithm. *

    def calculate_average_edge_length(self,
                                      prop_e=None,
                                      value=1,
                                      verbose=False):
        """
        Calculates the average edge length in the graph.

        If a special edge property is specified, includes only the edges where
        this property equals the given value. If there are no edges in the
        graph, the given property does not exist or there are no edges with the
        given property equaling the given value, None is returned.

        Args:
            prop_e (str, optional): edge property, if specified only edges where
                this property equals the given value will be considered
            value (int, optional): value of the specified edge property an edge
                has to have in order to be considered (default 1)
            verbose (boolean, optional): if True (default False), some extra
                information will be printed out

        Returns:
            the average edge length in the graph (float) or None
        """
        total_edge_length = 0
        average_edge_length = None
        if prop_e is None:
            if verbose:
                print("Considering all edges:")
            if self.graph.num_edges() > 0:
                if verbose:
                    print("{} edges".format(self.graph.num_edges()))
                average_edge_length = np.mean(self.graph.ep.distance.a)
            else:
                print("There are no edges in the graph!")
        elif prop_e in self.graph.edge_properties:
            if verbose:
                print("Considering only edges with property {} equaling value "
                      "{}!".format(prop_e, value))
            num_special_edges = 0
            for ed in self.graph.edges():
                if self.graph.edge_properties[prop_e][ed] == value:
                    num_special_edges += 1
                    total_edge_length += self.graph.ep.distance[ed]
            if num_special_edges > 0:
                if verbose:
                    print("{} such edges".format(num_special_edges))
                average_edge_length = total_edge_length / num_special_edges
            else:
                print("There are no edges with the property {} equaling value "
                      "{}!".format(prop_e, value))
        if verbose:
            print("Average length: {}".format(average_edge_length))
        return average_edge_length

    def find_geodesic_neighbors(self,
                                v,
                                g_max,
                                full_dist_map=None,
                                only_surface=False,
                                verbose=False):
        """
        Finds geodesic neighbor vertices of a given vertex v in the graph that
        are within a given maximal geodesic distance g_max from it.

        Also finds the corresponding geodesic distances. All edges are
        considered. The distances are calculated with Dijkstra's algorithm.

        Args:
            v (graph_tool.Vertex): the source vertex
            g_max: maximal geodesic distance (in the units of the graph)
            full_dist_map (graph_tool.PropertyMap, optional): the full distance
                map for the whole graph; if None, a local distance map is
                calculated for each vertex (default)
            only_surface (boolean, optional): if True (default False), only
                neighbors classified as surface patch (class 1) are considered
            verbose (boolean, optional): if True (default False), some extra
                information will be printed out

        Returns:
            a dictionary mapping a neighbor vertex index to the geodesic
            distance from vertex v
        """
        if full_dist_map is not None:
            dist_v = full_dist_map[v].get_array()
        else:
            dist_v = shortest_distance(self.graph,
                                       source=v,
                                       target=None,
                                       weights=self.graph.ep.distance,
                                       max_dist=g_max)
            dist_v = dist_v.get_array()
        # numpy array of distances from v to all vertices, in vertex index order

        vertex = self.graph.vertex
        orientation_class = self.graph.vp.orientation_class
        neighbor_id_to_dist = dict()

        idxs = np.where(dist_v <= g_max)[0]  # others are INF
        for idx in idxs:
            dist = dist_v[idx]
            if dist != 0:  # ignore the source vertex itself
                v_i = vertex(idx)
                if (not only_surface) or orientation_class[v_i] == 1:
                    neighbor_id_to_dist[idx] = dist

        if verbose:
            print("{} neighbors".format(len(neighbor_id_to_dist)))
        return neighbor_id_to_dist

    def find_geodesic_neighbors_exact(self,
                                      o,
                                      g_max,
                                      only_surface=False,
                                      verbose=False,
                                      debug=False):
        """
        Finds geodesic neighbor vertices of the origin vertex o in the graph
        that are within a given maximal geodesic distance g_max from it.

        Also finds the corresponding geodesic distances. All edges and faces are
        considered. The distances are calculated with Sun's and Abidi's
        algorithm, a simplification of Kimmels' and Sethian's fast marching
        algorithm.

        Args:
            o (graph_tool.Vertex): the source vertex
            g_max: maximal geodesic distance (in the units of the graph)
            only_surface (boolean, optional): if True (default False), only
                neighbors classified as surface patch (class 1) are considered
            verbose (boolean, optional): if True (default False), some extra
                information will be printed out
            debug (boolean, optional): if True (default False), some more extra
                information will be printed out

        Returns:
            a dictionary mapping a neighbor vertex index to the geodesic
            distance from vertex o
        """
        # Shortcuts
        xyz = self.graph.vp.xyz
        vertex = self.graph.vertex
        orientation_class = self.graph.vp.orientation_class
        distance_between_voxels = self.distance_between_voxels
        calculate_geodesic_distance = self._calculate_geodesic_distance
        insert_geo_dist_vertex_id = self._insert_geo_dist_vertex_id
        # Initialization
        geo_dist_heap = []  # heap has the smallest geodesic distance first
        # dictionaries to keep track which geodesic distance belongs to which
        # vertex or vertices and vice versa
        geo_dist_to_vertex_ids = {}
        vertex_id_to_geo_dist = {}
        neighbor_id_to_dist = {}  # output dictionary
        # Tag the center point (o) as Alive:
        self.graph.vp.tag = self.graph.new_vertex_property("string")
        tag = self.graph.vp.tag  # shortcut
        tag[o] = "Alive"
        if debug:
            print("Vertex o={}: Alive".format(int(o)))
        vertex_id_to_geo_dist[int(o)] = 0  # need it for geo. dist. calculation
        xyz_o = tuple(xyz[o])
        for n in o.all_neighbours():
            # Tag all neighboring points of the center point (n) as Close
            tag[n] = "Close"
            # Geodesic distance in this case = Euclidean between o and n
            xyz_n = tuple(xyz[n])
            on = distance_between_voxels(xyz_o, xyz_n)
            if debug:
                print("Vertex n={}: Close with distance {}".format(int(n), on))
            heappush(geo_dist_heap, on)
            insert_geo_dist_vertex_id(geo_dist_to_vertex_ids, on, int(n))
            vertex_id_to_geo_dist[int(n)] = on

        # Repeat while the smallest distance is <= g_max
        while len(geo_dist_heap) >= 1 and geo_dist_heap[0] <= g_max:
            if debug:
                print("\n{} distances in heap, first={}".format(
                    len(geo_dist_heap), geo_dist_heap[0]))
            # 1. Change the tag of the point in Close with the smallest
            # geodesic distance (a) from Close to Alive
            smallest_geo_dist = heappop(geo_dist_heap)
            closest_vertices_ids = geo_dist_to_vertex_ids[smallest_geo_dist]
            a = vertex(closest_vertices_ids[0])
            if len(closest_vertices_ids) > 1:  # move the first one (a) to the
                # back, so it's not taken again next time
                closest_vertices_ids.pop(0)
                closest_vertices_ids.append(int(a))
            tag[a] = "Alive"
            # only proceed if a is a surface patch:
            if only_surface and orientation_class[a] != 1:
                continue
            neighbor_id_to_dist[int(a)] = smallest_geo_dist  # add a to output
            if debug:
                print("Vertex a={}: Alive".format(int(a)))
            neighbors_a = set(a.all_neighbours())  # actually don't have
            # duplicates, but like this can use fast sets' intersection method
            for c in neighbors_a:
                # 2. Tag all neighboring points (c) of this point as Close,
                # but skip those which are Alive already
                if tag[c] == "Alive":
                    if debug:
                        print("Skipping Alive neighbor {}".format(int(c)))
                    continue
                tag[c] = "Close"
                if debug:
                    print("Vertex c={}: Close".format(int(c)))
                # 3. Recompute the geodesic distance of these neighboring
                # points, using only values of points that are Alive, and renew
                # it only if the recomputed result is smaller
                # Find Alive point b, belonging to the same triangle as a and c:
                # iterate over an intersection of the neighbors of a and c
                neighbors_c = set(c.all_neighbours())
                common_neighbors_a_c = neighbors_a.intersection(neighbors_c)
                for b in common_neighbors_a_c:
                    # check if b is tagged Alive
                    if tag[b] == "Alive":
                        if debug:
                            print("\tUsing vertex b={}".format(int(b)))
                        new_geo_dist_c = calculate_geodesic_distance(
                            a,
                            b,
                            xyz[c].a,
                            vertex_id_to_geo_dist,
                            verbose=verbose)
                        if int(c) not in vertex_id_to_geo_dist:  # add c
                            if debug:
                                print("\tadding new distance {}".format(
                                    new_geo_dist_c))
                            vertex_id_to_geo_dist[int(c)] = new_geo_dist_c
                            heappush(geo_dist_heap, new_geo_dist_c)
                            insert_geo_dist_vertex_id(geo_dist_to_vertex_ids,
                                                      new_geo_dist_c, int(c))
                        else:
                            old_geo_dist_c = vertex_id_to_geo_dist[int(c)]
                            if new_geo_dist_c < old_geo_dist_c:  # update c
                                if debug:
                                    print(
                                        "\tupdating distance {} to {}".format(
                                            old_geo_dist_c, new_geo_dist_c))
                                vertex_id_to_geo_dist[int(c)] = new_geo_dist_c
                                if old_geo_dist_c in geo_dist_heap:
                                    # check because it is sometimes not there
                                    geo_dist_heap.remove(old_geo_dist_c)
                                heappush(geo_dist_heap, new_geo_dist_c)
                                old_geo_dist_vertex_ids = geo_dist_to_vertex_ids[
                                    old_geo_dist_c]
                                if len(old_geo_dist_vertex_ids) == 1:
                                    del geo_dist_to_vertex_ids[old_geo_dist_c]
                                else:
                                    old_geo_dist_vertex_ids.remove(int(c))
                                insert_geo_dist_vertex_id(
                                    geo_dist_to_vertex_ids, new_geo_dist_c,
                                    int(c))
                            elif debug:
                                print("\tkeeping the old distance={}, because "
                                      "it's <= the new={}".format(
                                          old_geo_dist_c, new_geo_dist_c))
                        # if debug:
                        #     print(geo_dist_heap)
                        #     print(geo_dist_to_vertex_ids)
                        #     print(vertex_id_to_geo_dist)
                        #     print(neighbor_id_to_dist)
                        break  # one Alive b is expected, stop iteration
                else:
                    if debug:
                        print("\tNo common neighbors of a and c are Alive")

        del self.graph.vertex_properties["tag"]
        if debug:
            print("Vertex o={} has {} geodesic neighbors".format(
                int(o), len(neighbor_id_to_dist)))
        if verbose:
            print("{} neighbors".format(len(neighbor_id_to_dist)))
        return neighbor_id_to_dist

    def _calculate_geodesic_distance(self,
                                     a,
                                     b,
                                     xyz_c,
                                     vertex_id_to_geo_dist,
                                     verbose=False):
        geo_dist_a = vertex_id_to_geo_dist[int(a)]
        geo_dist_b = vertex_id_to_geo_dist[int(b)]
        xyz_a = self.graph.vp.xyz[a].a
        xyz_b = self.graph.vp.xyz[b].a
        ab = euclidean_distance(xyz_a, xyz_b)
        ac = euclidean_distance(xyz_a, xyz_c)
        bc = euclidean_distance(xyz_b, xyz_c)
        # maybe faster to use linalg.euclidean_distance directly on np.ndarrays
        alpha = nice_acos((ab**2 + ac**2 - bc**2) / (2 * ab * ac))
        beta = nice_acos((ab**2 + bc**2 - ac**2) / (2 * ab * bc))
        if alpha < (math.pi / 2) and beta < (math.pi / 2):
            if verbose:
                print("\ttriangle abc is acute")
            theta = nice_acos((geo_dist_a**2 + ab**2 - geo_dist_b**2) /
                              (2 * ab * geo_dist_a))
            geo_dist_c = math.sqrt(ac**2 + geo_dist_a**2 - 2 * ac *
                                   geo_dist_a * math.cos(alpha + theta))
        else:
            if verbose:
                print("\ttriangle abc is obtuse")
            geo_dist_c = min(geo_dist_a + ac, geo_dist_b + bc)
        return geo_dist_c

    @staticmethod
    def _insert_geo_dist_vertex_id(geo_dist_to_vertices, geo_dist, vertex_ind):
        if geo_dist in geo_dist_to_vertices:
            geo_dist_to_vertices[geo_dist].append(vertex_ind)
        else:
            geo_dist_to_vertices[geo_dist] = [vertex_ind]

    def get_vertex_property_array(self, property_name):
        """
        Gets a numpy array with all values of a vertex property of the graph,
        printing out the number of values, the minimal and the maximal value.

        Args:
            property_name (str): vertex property name

        Returns:
            an array (numpy.ndarray) with all values of the vertex property
        """
        if (isinstance(property_name, str)
                and property_name in self.graph.vertex_properties):
            values = np.array(
                self.graph.vertex_properties[property_name].get_array())
            print('{} "{}" values'.format(len(values), property_name))
            print('min = {}, max = {}, mean = {}'.format(
                min(values), max(values), np.mean(values)))
            return values
        else:
            raise pexceptions.PySegInputError(
                expr='get_vertex_property_array (SegmentationGraph)',
                msg=('The input "{}" is not a str object or is not found in '
                     'vertex properties of the graph.'.format(property_name)))
Example #8
0
    def makeGraphFast(self,img,dia,xScale,yScale):
        print('Building Graph Data Structure'),
        start=time.time()
        G = Graph(directed=False)
        sumAddVertices=0
        
        vprop=G.new_vertex_property('object')
        eprop=G.new_edge_property('object')
        epropW=G.new_edge_property("float")
        h, w = np.shape(img)
        if xScale>0 and yScale>0: avgScale=(xScale+yScale)/2
        else: 
            avgScale=1.
            xScale=1.
            yScale=1.
        addedVerticesLine2=[]
        vListLine2=[]
        percentOld=0
        counter=0
        '''
        Sweep over each line in the image except the last line
        '''
        for idx,i in enumerate(img[:len(img)-2]):
            '''
            Get foreground indices in the current line of the image and make vertices
            '''
            counter+=1
            percent=(float(counter)/float(h))*100
            if percentOld+10< percent: 
                print (str(np.round(percent,1))+'% '),
                percentOld=percent

            line1=np.where(i==True)
            if len(line1[0])>0:
                line1=set(line1[0]).difference(set(addedVerticesLine2))
                vL=G.add_vertex(len(list(line1)))
                
                
                if len(line1)>1 : 
                    vList=vListLine2+list(vL)
                else: vList=vListLine2+[vL]
                line1=addedVerticesLine2+list(line1)
                for jdx,j in enumerate(line1):
                    vprop[vList[jdx]]={'imgIdx':(j,idx),'coord': (float(j)*xScale,float(idx)*yScale), 'nrOfPaths':0, 'diameter':float(dia[idx][j])*avgScale}
                '''
                keep order of the inserted vertices
                '''
                sumAddVertices+=len(line1)
                
                addedVerticesLine2=[]
                vListLine2=[]
                '''
                Connect foreground indices to neighbours in the next line
                '''
                for v1 in line1:
                    va=vList[line1.index(v1)]
                    diagonalLeft = diagonalRight = True
                    try:
                        if img[idx][v1-1]==True:
                            diagonalLeft=False
                            vb=vList[line1.index(v1-1)]
                            e=G.add_edge(va,vb)
                            eprop[e]={'coord1':vprop[va]['coord'], 'coord2':vprop[vb]['coord'],'weight':((vprop[va]['diameter']+vprop[vb]['diameter'])/2),'RTP':False}
                            epropW[e]=2./(eprop[e]['weight']**2)
                    except:
                        print 'Boundary vertex at: '+str([v1,idx-1])+' image size: '+ str([w,h])
                        pass
                    
                    try:
                        if img[idx][v1+1]==True:
                            diagonalRight=False
                            vb=vList[line1.index(v1+1)]
                            e=G.add_edge(va,vb)
                            eprop[e]={'coord1':vprop[va]['coord'], 'coord2':vprop[vb]['coord'],'weight':((vprop[va]['diameter']+vprop[vb]['diameter'])/2),'RTP':False}
                            epropW[e]=2./(eprop[e]['weight']**2)
                    except:
                        print 'Boundary vertex at: '+str([v1+1,idx])+' image size: '+ str([w,h])
                        pass # just if we are out of bounds
                    
                    try:
                        if img[idx+1][v1]==True:
                            diagonalRight=False
                            diagonalLeft=False
                            vNew=G.add_vertex()
                            vprop[vNew]={'imgIdx':(v1,idx+1),'coord': (float(v1)*xScale,float(idx+1)*yScale), 'nrOfPaths':0, 'diameter':float(dia[idx+1][v1])*avgScale}
                            vListLine2.append(vNew)
                            e=G.add_edge(vList[line1.index(v1)],vNew)
                            eprop[e]={'coord1':vprop[va]['coord'], 'coord2':vprop[vNew]['coord'],'weight':((vprop[va]['diameter']+vprop[vNew]['diameter'])/2),'RTP':False}
                            epropW[e]=1./(eprop[e]['weight']**2)
                            if v1 not in addedVerticesLine2: addedVerticesLine2.append(v1)
                    except:
                        print 'Boundary vertex at: '+str([v1,idx+1])+' image size: '+ str([w,h])
                        pass
                    
                    try:    
                        if diagonalRight == True and img[idx+1][v1+1]==True:
                            vNew=G.add_vertex()
                            vprop[vNew]={'imgIdx':(v1+1,idx+1),'coord': (float(v1+1)*xScale,float(idx+1)*yScale), 'nrOfPaths':0, 'diameter':float(dia[idx+1][v1+1])*avgScale}
                            vListLine2.append(vNew)
                            e=G.add_edge(vList[line1.index(v1)],vNew)
                            eprop[e]={'coord1':vprop[va]['coord'], 'coord2':vprop[vNew]['coord'],'weight':((vprop[va]['diameter']+vprop[vNew]['diameter'])/2),'RTP':False}
                            epropW[e]=1.41/(eprop[e]['weight']**2)
                            if v1+1 not in addedVerticesLine2: addedVerticesLine2.append(v1+1)
                    except:
                        print 'Boundary vertex at: '+str([v1+1,idx+1])+' image size: '+ str([w,h])
                        pass
                    
                    try:
                        if diagonalLeft  == True and img[idx+1][v1-1]==True:
                            vNew=G.add_vertex()
                            vprop[vNew]={'imgIdx':(v1-1,idx+1),'coord': (float(v1-1)*xScale,float(idx+1)*yScale), 'nrOfPaths':0, 'diameter':float(dia[idx+1][v1-1])*avgScale}
                            vListLine2.append(vNew)
                            e=G.add_edge(vList[line1.index(v1)],vNew)
                            eprop[e]={'coord1':vprop[va]['coord'], 'coord2':vprop[vNew]['coord'],'weight':((vprop[va]['diameter']+vprop[vNew]['diameter'])/2),'RTP':False}
                            epropW[e]=1.41/(eprop[e]['weight']**2)
                            if v1-1 not in addedVerticesLine2: addedVerticesLine2.append(v1-1)
                    except:
                        print 'Boundary vertex at: '+str([v1-1,idx+1])+' image size: '+ str([w,h])
                        pass
                    try:
                        if img[idx][v1+1]==False and img[idx][v1-1]==False and img[idx+1][v1]==False and diagonalLeft==False and diagonalRight==False:
                            print 'tip detected'
                            if img[idx-1][v1-1]==False and img[idx-1][v1+1]==False and img[idx-1][v1]==False:
                                print 'floating pixel'
                    except:
                        pass
        
        print'done!'                               
        G.edge_properties["ep"] = eprop
        G.edge_properties["w"] = epropW
        G.vertex_properties["vp"] = vprop            
        print 'graph build in '+str(time.time()-start)
        l = gt.label_largest_component(G)
        u = gt.GraphView(G, vfilt=l)
        print '# vertices'
        print(u.num_vertices())
        print(G.num_vertices())
        if u.num_vertices()!=G.num_vertices(): self.__fail=float((G.num_vertices()-u.num_vertices()))/float(G.num_vertices())
        return u,u.num_vertices()

prop_question_id = g.new_vertex_property('int')
prop_question_id.a = np.array(list(id2q_map.values()))

# focus on largest CC
g.set_vertex_filter(vfilt)

# re-index the graph
# SO qustion: https://stackoverflow.com/questions/46264296/graph-tool-re-index-vertex-ids-to-be-consecutive-integers
n2i = {n: i for i, n in enumerate(g.vertices())}
i2n = dict(zip(n2i.values(), n2i.keys()))

new_g = Graph()
new_g.add_edge_list([(n2i[e.source()], n2i[e.target()]) for e in g.edges()])


# update question ids
new_prop_question_id = new_g.new_vertex_property('int')
new_prop_question_id.a = [prop_question_id[i2n[i]] for i in range(new_g.num_vertices())]
new_g.vertex_properties['question_id'] = new_prop_question_id


print('saving largest CC in graph')
new_g.save('{}/question_graph.gt'.format(data_dir))


print('saving connected_question_ids')
pkl.dump(list(new_prop_question_id.a),
         open('{}/connected_question_ids.pkl'.format(data_dir), 'wb'))
class BoardGraphGraphtool(BoardGraphBase):

    def __init__(self, number_of_vertices, graph_type):
        super().__init__(number_of_vertices, graph_type)
        # Graph tool creates directed multigraph by default.
        self._graph = Graph()
        self._graph.add_vertex(number_of_vertices)
        self._graph.vertex_properties["cell"] = self._graph.new_vertex_property(
            "object", number_of_vertices * [BoardCell()]
        )
        self._graph.edge_properties["direction"
                                   ] = self._graph.new_edge_property("object")
        self._graph.edge_properties["weight"
                                   ] = self._graph.new_edge_property("int")

    def __getitem__(self, position):
        return self._graph.vp.cell[self._graph.vertex(position)]

    def __setitem__(self, position, board_cell):
        self._graph.vp.cell[self._graph.vertex(position)] = board_cell

    def __contains__(self, position):
        return position in range(0, self.vertices_count())

    def vertices_count(self):
        return self._graph.num_vertices()

    def edges_count(self):
        return self._graph.num_edges()

    def has_edge(self, source_vertice, target_vertice, direction):
        for e in self._graph.vertex(source_vertice).out_edges():
            if (
                int(e.target()) == target_vertice and
                self._graph.ep.direction[e] == direction
            ):
                return True
        return False

    def out_edges_count(self, source_vertice, target_vertice):
        return len([
            1 for e in self._graph.vertex(source_vertice).out_edges()
            if int(e.target()) == target_vertice
        ])

    def reconfigure_edges(self, width, height, tessellation):
        """
        Uses tessellation object to create all edges in graph.
        """
        self._graph.clear_edges()
        for source_vertice in self._graph.vertices():
            for direction in tessellation.legal_directions:
                neighbor_vertice = tessellation.neighbor_position(
                    int(source_vertice),
                    direction,
                    board_width=width,
                    board_height=height
                )
                if neighbor_vertice is not None:
                    e = self._graph.add_edge(
                        source_vertice, neighbor_vertice, add_missing=False
                    )
                    self._graph.ep.direction[e] = direction

    # TODO: Faster version?
    # def reconfigure_edges(self, width, height, tessellation):
    #     """
    #     Uses tessellation object to create all edges in graph.
    #     """
    #     self._graph.clear_edges()
    #     edges_to_add = []
    #     directions_to_add = dict()
    #     for source_vertice in self._graph.vertices():
    #         for direction in tessellation.legal_directions:
    #             neighbor_vertice = tessellation.neighbor_position(
    #                 int(source_vertice), direction,
    #                 board_width=width, board_height=height
    #             )
    #             if neighbor_vertice is not None:
    #                 edge = (int(source_vertice), neighbor_vertice,)

    #                 edges_to_add.append(edge)

    #                 if edge not in directions_to_add:
    #                     directions_to_add[edge] = deque()

    #                 directions_to_add[edge].append(direction)

    #     self._graph.add_edge_list(edges_to_add) if edges_to_add else None

    #     for e in edges_to_add:
    #         e_descriptors = self._graph.edge(
    #             s = self._graph.vertex(e[0]),
    #             t = self._graph.vertex(e[1]),
    #             all_edges = True
    #         )

    #         for e_descriptor in e_descriptors:
    #             if len(directions_to_add[e]) > 0:
    #                 self._graph.ep.direction[e_descriptor] = directions_to_add[e][0]
    #                 directions_to_add[e].popleft()

    def calculate_edge_weights(self):
        for e in self._graph.edges():
            self._graph.ep.weight[e] = self.out_edge_weight(int(e.target()))

    def neighbor(self, from_position, direction):
        try:
            for e in self._graph.vertex(from_position).out_edges():
                if self._graph.ep.direction[e] == direction:
                    return int(e.target())
        except ValueError as e:
            raise IndexError(e.args)

        return None

    def wall_neighbors(self, from_position):
        return [
            int(n) for n in self._graph.vertex(from_position).out_neighbours()
            if self[int(n)].is_wall
        ]

    def all_neighbors(self, from_position):
        return [
            int(n) for n in self._graph.vertex(from_position).out_neighbours()
        ]

    def shortest_path(self, start_position, end_position):
        try:
            return [
                int(v)
                for v in shortest_path(
                    g=self._graph,
                    source=self._graph.vertex(start_position),
                    target=self._graph.vertex(end_position),
                )[0]
            ]
        except ValueError:
            return []

    def dijkstra_path(self, start_position, end_position):
        try:
            self.calculate_edge_weights()
            return [
                int(v)
                for v in shortest_path(
                    g=self._graph,
                    source=self._graph.vertex(start_position),
                    target=self._graph.vertex(end_position),
                    weights=self._graph.ep.weight,
                )[0]
            ]
        except ValueError:
            return []

    def position_path_to_direction_path(self, position_path):
        retv = []
        src_vertice_index = 0
        for target_vertice in position_path[1:]:
            source_vertice = position_path[src_vertice_index]
            src_vertice_index += 1

            for out_edge in self._graph.vertex(source_vertice).out_edges():
                if int(out_edge.target()) == target_vertice:
                    retv.append(self._graph.ep.direction[out_edge])

        return {
            'source_position': position_path[0] if position_path else None,
            'path': retv
        }
Example #11
0
class FullGraphBuilder:
    def __init__(self):
        self.graph = Graph(directed=False)
        self.codes = []
        self.sources = []
        self.labels = []
        self.attrs = set()

    def add_nodes(self, df, ns):
        n = len(df)
        _log.info('adding %d nodes to graph', n)
        start = self.graph.num_vertices()
        vs = self.graph.add_vertex(n)
        end = self.graph.num_vertices()
        assert end - start == n
        nodes = pd.Series(np.arange(start, end, dtype='i4'), index=df['id'])
        self.codes.append(df['id'].values + ns.offset)
        self.sources.append(np.full(n, ns.code, dtype='i2'))
        if 'label' in df.columns:
            self.labels += list(df['label'].values)
        else:
            self.labels += list(df['id'].astype('str').values)

        for c in df.columns:
            if c in ['id', 'label']:
                continue
            if c not in self.attrs:
                vp = self.graph.new_vp('string')
                self.graph.vp[c] = vp
                self.attrs.add(c)
            else:
                vp = self.graph.vp[c]

            for v, val in zip(vs, df[c].values):
                vp[v] = val

        return nodes

    def add_edges(self, f, src, dst):
        _log.info('adding %d edges to graph', len(f))
        edges = np.zeros((len(f), 2), dtype='i4')
        edges[:, 0] = src.loc[f.iloc[:, 0]]
        edges[:, 1] = dst.loc[f.iloc[:, 1]]
        self.graph.add_edge_list(edges)

    def finish(self):
        _log.info('setting code attributes')
        code_a = self.graph.new_vp('int64_t')
        code_a.a[:] = np.concatenate(self.codes)
        self.graph.vp['code'] = code_a

        _log.info('setting source attributes')
        source_a = self.graph.new_vp('string')
        for v, s in zip(self.graph.vertices(), np.concatenate(self.sources)):
            source_a[v] = src_label_rev[s]
        self.graph.vp['source'] = source_a

        _log.info('setting source attributes')
        label_a = self.graph.new_vp('string')
        for v, l in zip(self.graph.vertices(), self.labels):
            label_a[v] = l
        self.graph.vp['label'] = label_a

        return self.graph
def gen_fs(dicProperties):
	np.random.seed()
	graphFS = Graph()
	# on définit la fraction des arcs à utiliser la réciprocité
	f = dicProperties["Reciprocity"]
	rFracRecip =  f/(2.0-f)
	# on définit toutes les grandeurs de base
	rInDeg = dicProperties["InDeg"]
	rOutDeg = dicProperties["OutDeg"]
	nNodes = 0
	nEdges = 0
	rDens = 0.0
	if "Nodes" in dicProperties.keys():
		nNodes = dicProperties["Nodes"]
		graphFS.add_vertex(nNodes)
		if "Edges" in dicProperties.keys():
			nEdges = dicProperties["Edges"]
			rDens = nEdges / float(nNodes**2)
			dicProperties["Density"] = rDens
		else:
			rDens = dicProperties["Density"]
			nEdges = int(np.floor(rDens*nNodes**2))
			dicProperties["Edges"] = nEdges
	else:
		nEdges = dicProperties["Edges"]
		rDens = dicProperties["Density"]
		nNodes = int(np.floor(np.sqrt(nEdges/rDens)))
		graphFS.add_vertex(nNodes)
		dicProperties["Nodes"] = nNodes
	# on définit le nombre d'arcs à créer
	nArcs = int(np.floor(rDens*nNodes**2)/(1+rFracRecip))
	# on définit les paramètres fonctions de probabilité associées F(x) = A x^{-tau}
	Ai = nArcs*(rInDeg-1)/(nNodes)
	Ao = nArcs*(rOutDeg-1)/(nNodes)
	# on définit les moyennes des distributions de pareto 2 = lomax
	rMi = 1/(rInDeg-2.)
	rMo = 1/(rOutDeg-2.)
	# on définit les trois listes contenant les degrés sortant/entrant/bidirectionnels associés aux noeuds i in range(nNodes)
	lstInDeg = np.random.pareto(rInDeg,nNodes)+1
	lstOutDeg = np.random.pareto(rOutDeg,nNodes)+1
	lstInDeg = np.floor(np.multiply(Ai/np.mean(lstInDeg), lstInDeg)).astype(int)
	lstOutDeg = np.floor(np.multiply(Ao/np.mean(lstOutDeg), lstOutDeg)).astype(int)
	# on génère les stubs qui vont être nécessaires et on les compte
	nInStubs = int(np.sum(lstInDeg))
	nOutStubs = int(np.sum(lstOutDeg))
	lstInStubs = np.zeros(np.sum(lstInDeg))
	lstOutStubs = np.zeros(np.sum(lstOutDeg))
	nStartIn = 0
	nStartOut = 0
	for vert in range(nNodes):
		nInDegVert = lstInDeg[vert]
		nOutDegVert = lstOutDeg[vert]
		for j in range(np.max([nInDegVert,nOutDegVert])):
			if j < nInDegVert:
				lstInStubs[nStartIn+j] += vert
			if j < nOutDegVert:
				lstOutStubs[nStartOut+j] += vert
		nStartOut+=nOutDegVert
		nStartIn+=nInDegVert
	# on vérifie qu'on a à peu près le nombre voulu d'edges
	while nInStubs*(1+rFracRecip)/float(nArcs) < 0.95 :
		vert = np.random.randint(0,nNodes)
		nAddInStubs = int(np.floor(Ai/rMi*(np.random.pareto(rInDeg)+1)))
		lstInStubs = np.append(lstInStubs,np.repeat(vert,nAddInStubs)).astype(int)
		nInStubs+=nAddInStubs
	while nOutStubs*(1+rFracRecip)/float(nArcs) < 0.95 :
		nAddOutStubs = int(np.floor(Ao/rMo*(np.random.pareto(rOutDeg)+1)))
		lstOutStubs = np.append(lstOutStubs,np.repeat(vert,nAddOutStubs)).astype(int)
		nOutStubs+=nAddOutStubs
	# on s'assure d'avoir le même nombre de in et out stubs (1.13 is an experimental correction)
	nMaxStubs = int(1.13*(2.0*nArcs)/(2*(1+rFracRecip)))
	if nInStubs > nMaxStubs and nOutStubs > nMaxStubs:
		np.random.shuffle(lstInStubs)
		np.random.shuffle(lstOutStubs)
		lstOutStubs.resize(nMaxStubs)
		lstInStubs.resize(nMaxStubs)
		nOutStubs = nInStubs = nMaxStubs
	elif nInStubs < nOutStubs:
		np.random.shuffle(lstOutStubs)
		lstOutStubs.resize(nInStubs)
		nOutStubs = nInStubs
	else:
		np.random.shuffle(lstInStubs)
		lstInStubs.resize(nOutStubs)
		nInStubs = nOutStubs
	# on crée le graphe, les noeuds et les stubs
	nRecip = int(np.floor(nInStubs*rFracRecip))
	nEdges = nInStubs + nRecip +1
	# les stubs réciproques
	np.random.shuffle(lstInStubs)
	np.random.shuffle(lstOutStubs)
	lstInRecip = lstInStubs[0:nRecip]
	lstOutRecip = lstOutStubs[0:nRecip]
	lstEdges = np.array([np.concatenate((lstOutStubs,lstInRecip)),np.concatenate((lstInStubs,lstOutRecip))]).astype(int)
	# add edges
	graphFS.add_edge_list(np.transpose(lstEdges))
	remove_self_loops(graphFS)
	remove_parallel_edges(graphFS)
	lstIsolatedVert = find_vertex(graphFS, graphFS.degree_property_map("total"), 0)
	graphFS.remove_vertex(lstIsolatedVert)
	graphFS.reindex_edges()
	nNodes = graphFS.num_vertices()
	nEdges = graphFS.num_edges()
	rDens = nEdges / float(nNodes**2)
	# generate types
	rInhibFrac = dicProperties["InhibFrac"]
	lstTypesGen = np.random.uniform(0,1,nEdges)
	lstTypeLimit = np.full(nEdges,rInhibFrac)
	lstIsExcitatory = np.greater(lstTypesGen,lstTypeLimit)
	nExc = np.count_nonzero(lstIsExcitatory)
	epropType = graphFS.new_edge_property("int",np.multiply(2,lstIsExcitatory)-np.repeat(1,nEdges)) # excitatory (True) or inhibitory (False)
	graphFS.edge_properties["type"] = epropType
	# and weights
	if dicProperties["Weighted"]:
		lstWeights = dicGenWeights[dicProperties["Distribution"]](graphFS,dicProperties,nEdges,nExc) # generate the weights
		epropW = graphFS.new_edge_property("double",lstWeights) # crée la propriété pour stocker les poids
		graphFS.edge_properties["weight"] = epropW
	return graphFS
Example #13
0
def build_graph(df_list, sens='ST', top=410, min_sens=0.01,
                edge_cutoff=0.0):
    """
    Initializes and constructs a graph where vertices are the parameters
    selected from the first dataframe in 'df_list', subject to the
    constraints set by 'sens', 'top', and 'min_sens'.  Edges are the second
    order sensitivities of the interactions between those vertices,
    with sensitivities greater than 'edge_cutoff'.

    Parameters
    -----------
    df_list     : list
                  A list of two dataframes.  The first dataframe should be
                  the first/total order sensitivities collected by the
                  function data_processing.get_sa_data().
    sens        : str, optional
                  A string with the name of the sensitivity that you would
                  like to use for the vertices ('ST' or 'S1').
    top         : int, optional
                  An integer specifying the number of vertices to display (
                  the top sensitivity values).
    min_sens    : float, optional
                  A float with the minimum sensitivity to allow in the graph.
    edge_cutoff : float, optional
                  A float specifying the minimum second order sensitivity to
                  show as an edge in the graph.

    Returns
    --------
    g : graph-tool object
        a graph-tool graph object of the network described above.  Each
        vertex has properties 'param', 'sensitivity', and 'confidence'
        corresponding to the name of the parameter, value of the sensitivity
        index, and it's confidence interval.  The only edge property is
        'second_sens', the second order sensitivity index for the
        interaction between the two vertices it connects.
    """

    # get the first/total index dataframe and second order dataframe
    df = df_list[0]
    df2 = df_list[1]

    # Make sure sens is ST or S1
    if sens not in set(['ST', 'S1']):
        raise ValueError('sens must be ST or S1')
    # Make sure that there is a second order index dataframe
    try:
        if not df2:
            raise Exception('Missing second order dataframe!')
    except:
        pass

    # slice the dataframes so the resulting graph will only include the top
    # 'top' values of 'sens' greater than 'min_sens'.
    df = df.sort_values(sens, ascending=False)
    df = df.ix[df[sens] > min_sens, :].head(top)
    df = df.reset_index()

    # initialize a graph
    g = Graph()

    vprop_sens = g.new_vertex_property('double')
    vprop_conf = g.new_vertex_property('double')
    vprop_name = g.new_vertex_property('string')
    eprop_sens = g.new_edge_property('double')

    g.vertex_properties['param'] = vprop_name
    g.vertex_properties['sensitivity'] = vprop_sens
    g.vertex_properties['confidence'] = vprop_conf
    g.edge_properties['second_sens'] = eprop_sens

    # keep a list of all the vertices
    v_list = []

    # Add the vertices to the graph
    for i, param in enumerate(df['Parameter']):
        v = g.add_vertex()
        vprop_sens[v] = df.ix[i, sens]
        vprop_conf[v] = 1 + df.ix[i, '%s_conf' % sens] / df.ix[i, sens]
        vprop_name[v] = param
        v_list.append(v)

    # Make two new columns in second order dataframe that point to the vertices
    # connected on each row.
    df2['vertex1'] = -999
    df2['vertex2'] = -999
    for vertex in v_list:
        param = g.vp.param[vertex]
        df2.ix[df2['Parameter_1'] == param, 'vertex1'] = vertex
        df2.ix[df2['Parameter_2'] == param, 'vertex2'] = vertex

    # Only allow edges for vertices that we've defined
    df_edges = df2[(df2['vertex1'] != -999) & (df2['vertex2'] != -999)]
    # eliminate edges below a certain cutoff value
    pruned = df_edges[df_edges['S2'] > edge_cutoff]
    pruned.reset_index(inplace=True)
    # Add the edges for the graph
    for i, sensitivity in enumerate(pruned['S2']):
        v1 = pruned.ix[i, 'vertex1']
        v2 = pruned.ix[i, 'vertex2']
        e = g.add_edge(v1, v2)
        # multiply by a number to make the lines visible on the plot
        eprop_sens[e] = sensitivity * 150

    # These are ways you can reference properties of vertices or edges
    # g.vp.param[g.vertex(77)]
    # g.vp.param[v_list[0]]

    print ('Created a graph with %s vertices and %s edges.\nVertices are the '
           'top %s %s values greater than %s.\nOnly S2 values (edges) '
           'greater than %s are included.' %
           (g.num_vertices(), g.num_edges(), top, sens, min_sens, edge_cutoff))

    return g
Example #14
0
    def expansion_snowball_sample(graph: Graph, num_vertices: int,
                                  prev_state: ExpansionSnowballSampleState,
                                  args: argparse.Namespace) -> SampleState:
        """Expansion snowball sampling. At every iteration, picks a vertex adjacent to the current sample that
        contributes the most new neighbors.

        Parameters
        ----------
        graph : Graph
            the filtered graph from which to sample vertices
        num_vertices : int
            number of vertices in the unfiltered graph
        prev_state : UniformRandomSampleState
            the state of the previous sample in the stack. If there is no previous sample, an empty SampleState object
            should be passed in here.
        args : argparse.Namespace
            the command-line arguments provided by the user

        Returns
        -------
        state : SampleState
            the sample state with the sampled vertex ids (Note: these ids correspond to the filtered graph, and have
            to be mapped back to the unfiltered graph)
        """
        state = ExpansionSnowballSampleState(graph.num_vertices(), prev_state)
        sample_num = int(
            (num_vertices * (args.sample_size / 100)) / args.sample_iterations)
        sample_num += len(state.sample_idx)
        if not state.neighbors:  # If there are no neighbors, start with the state.start vertex
            state.index_flag[state.start] = True
            state.neighbors = set(graph.get_out_neighbors(state.start))
            for neighbor in graph.get_out_neighbors(state.start):
                if neighbor == state.start:
                    state.neighbors.remove(neighbor)
                else:
                    state.neighbors_flag[neighbor] = True
                    new_neighbors = 0
                    for _neighbor in graph.get_out_neighbors(neighbor):
                        if not (state.index_flag[_neighbor]
                                or state.neighbors_flag[_neighbor]):
                            new_neighbors += 1
                    state.contribution[neighbor] += new_neighbors
        while len(state.index_set) == 0 or len(
                state.index_set) % sample_num != 0:
            if len(state.neighbors
                   ) == 0:  # choose random vertex not in index set
                vertex = np.random.choice(
                    np.setxor1d(np.arange(graph.num_vertices()),
                                state.index_set))
                state.index_set.append(vertex)
                state.index_flag[vertex] = True
                for neighbor in graph.get_out_neighbors(vertex):
                    if not (state.neighbors_flag[neighbor]
                            or state.index_flag[neighbor]):
                        Sample._add_neighbor(neighbor, state.contribution,
                                             state.index_flag,
                                             state.neighbors_flag,
                                             graph.get_out_neighbors(neighbor),
                                             graph.get_in_neighbors(neighbor),
                                             state.neighbors)
                continue
            elif np.max(state.contribution
                        ) == 0:  # choose random neighbors from neighbor set
                num_choices = min(len(state.neighbors),
                                  sample_num - len(state.index_set))
                vertices = np.random.choice(np.fromiter(
                    state.neighbors, int, len(state.neighbors)),
                                            num_choices,
                                            replace=False)
                for vertex in vertices:
                    state.index_set.append(vertex)
                    state.index_flag[vertex] = True
                    state.neighbors.remove(vertex)
                    for neighbor in graph.get_out_neighbors(vertex):
                        if not (state.neighbors_flag[neighbor]
                                or state.index_flag[neighbor]):
                            Sample._add_neighbor(
                                neighbor, state.contribution, state.index_flag,
                                state.neighbors_flag,
                                graph.get_out_neighbors(neighbor),
                                graph.get_in_neighbors(neighbor),
                                state.neighbors)
                continue
            vertex = np.argmax(state.contribution)
            state.index_set.append(vertex)
            state.index_flag[vertex] = True
            state.neighbors.remove(vertex)
            state.contribution[vertex] = 0
            for neighbor in graph.get_in_neighbors(vertex):
                if not (state.neighbors_flag[neighbor]
                        or state.index_flag[neighbor]):
                    Sample._add_neighbor(neighbor, state.contribution,
                                         state.index_flag,
                                         state.neighbors_flag,
                                         graph.get_out_neighbors(neighbor),
                                         graph.get_in_neighbors(neighbor),
                                         state.neighbors)
        state.sample_idx = np.asarray(state.index_set)
        return state
Example #15
0
def shortest_path_cover_logn_apx(g: gt.Graph, weight: gt.EdgePropertyMap):
    started_with_directed = g.is_directed()
    if not g.is_directed():
        reversed_edges = np.fliplr(g.get_edges())
        g.set_directed(True)

        new_reversed_edges = []

        for e in reversed_edges:
            if g.edge(e[0], e[1]) is None:
                g.add_edge(e[0], e[1])
            new_reversed_edges.append(e)

        #g.add_edge_list(reversed_edges)
        weight.a[-reversed_edges.shape[0]:] = weight.a[:reversed_edges.
                                                       shape[0]]

    if weight.value_type() not in [
            "bool", "int", "int16_t", "int32_t", "int64_t"
    ]:
        min = np.min(weight.a)
        min_second = np.min(weight.a[weight.a > min])

        eps = min_second - min
        scaled_weight = (np.ceil(weight.a / eps) *
                         (g.num_vertices() + 1)).astype(np.int)  # ints >= 1
    else:
        scaled_weight = weight.a * (g.num_vertices() + 1)

    summed_edge_weight = np.sum(scaled_weight)

    adjusted_weight = g.new_edge_property("long", vals=scaled_weight - 1)

    paths = []

    covered_vertices = set()

    while len(covered_vertices) != g.num_vertices():
        curr_paths = shortest_path_visiting_most_nodes(g, adjusted_weight,
                                                       covered_vertices,
                                                       summed_edge_weight)

        for path in curr_paths:
            paths.append(path)

            #if len(path) <= 2 switch to fast mode and just add single edges/vertices until done.
            path_vertices = set(path)
            for v in path_vertices.difference(covered_vertices):
                for w in g.get_in_neighbors(v):
                    adjusted_weight[g.edge(w, v)] += 1  #.a[list()] -= 1
                    if adjusted_weight[g.edge(
                            w, v)] % (g.num_vertices() + 1) != 0:
                        exit(5)

            new_covered = path_vertices.difference(covered_vertices)
            covered_vertices = covered_vertices.union(path_vertices)
            print(len(new_covered), len(path), len(covered_vertices), path)
    if not started_with_directed:
        g.set_directed(False)
        for e in new_reversed_edges:
            g.remove_edge(g.edge(e[0], e[1]))
    return paths
Example #16
0
class SegmentationGraph(object):
    """
    Class defining the abstract SegmentationGraph object, its attributes and
    implements methods common to all derived graph classes.

    The constructor requires the following parameters of the underlying
    segmentation that will be used to build the graph.

    Args:
        scale_factor_to_nm (float): pixel size in nanometers for scaling the
            graph
        scale_x (int): x axis length in pixels of the segmentation
        scale_y (int): y axis length in pixels of the segmentation
        scale_z (int): z axis length in pixels of the segmentation
    """

    def __init__(self, scale_factor_to_nm, scale_x, scale_y, scale_z):
        """
        Constructor.

        Args:
            scale_factor_to_nm (float): pixel size in nanometers for scaling the
                graph
            scale_x (int): x axis length in pixels of the segmentation
            scale_y (int): y axis length in pixels of the segmentation
            scale_z (int): z axis length in pixels of the segmentation

        Returns:
            None
        """
        self.graph = Graph(directed=False)
        """graph_tool.Graph: a graph object storing the segmentation graph
        topology, geometry and properties.
        """
        self.scale_factor_to_nm = scale_factor_to_nm
        """float: pixel size in nanometers for scaling the coordinates and
        distances in the graph
        """
        self.scale_x = scale_x
        """int: x axis length in pixels of the segmentation"""
        self.scale_y = scale_y
        """int: y axis length in pixels of the segmentation"""
        self.scale_z = scale_z
        """int: z axis length in pixels of the segmentation"""

        # Add "internal property maps" to the graph.
        # vertex property for storing the xyz coordinates in nanometers of the
        # corresponding vertex:
        self.graph.vp.xyz = self.graph.new_vertex_property("vector<float>")
        # edge property for storing the distance in nanometers between the
        # connected vertices:
        self.graph.ep.distance = self.graph.new_edge_property("float")

        self.coordinates_to_vertex_index = {}
        """dist: a dictionary mapping the vertex coordinates in nanometers
        (x, y, z) to the vertex index.
        """
        self.coordinates_pair_connected = {}
        """dict: a dictionary storing pairs of vertex coordinates in nanometers
        that are connected by an edge as a key in a tuple form
        ((x1, y1, z1), (x2, y2, z2)) with value True.
        """

    @staticmethod
    def distance_between_voxels(voxel1, voxel2):
        """
        Calculates and returns the Euclidean distance between two voxels.

        Args:
            voxel1 (tuple): first voxel coordinates in form of a tuple of
                integers of length 3 (x1, y1, z1)
            voxel2 (tuple): second voxel coordinates in form of a tuple of
                integers of length 3 (x2, y2, z2)

        Returns:
            the Euclidean distance between two voxels (float)
        """
        if (isinstance(voxel1, tuple) and (len(voxel1) == 3) and
                isinstance(voxel2, tuple) and (len(voxel2) == 3)):
            sum_of_squared_differences = 0
            for i in range(3):  # for each dimension
                sum_of_squared_differences += (voxel1[i] - voxel2[i]) ** 2
            return math.sqrt(sum_of_squared_differences)
        else:
            error_msg = ('Tuples of integers of length 3 required as first and '
                         'second input.')
            raise pexceptions.PySegInputError(
                expr='distance_between_voxels (SegmentationGraph)',
                msg=error_msg
            )

    def update_coordinates_to_vertex_index(self):
        """
        Updates graph's dictionary coordinates_to_vertex_index.

        The dictionary maps the vertex coordinates (x, y, z) scaled in
        nanometers to the vertex index. It has to be updated after purging the
        graph, because vertices are renumbered, as well as after reading a graph
        from a file (e.g. before density calculation).

        Returns:
            None
        """
        self.coordinates_to_vertex_index = {}
        for vd in self.graph.vertices():
            [x, y, z] = self.graph.vp.xyz[vd]
            self.coordinates_to_vertex_index[
                (x, y, z)] = self.graph.vertex_index[vd]

    def calculate_density(self, mask=None, target_coordinates=None,
                          verbose=False):
        """
        Calculates ribosome density for each membrane graph vertex.

        Calculates shortest geodesic distances (d) for each vertex in the graph
        to each reachable ribosome center mapped on the membrane given by a
        binary mask with coordinates in pixels or an array of coordinates in nm.
        Then, calculates a density measure of ribosomes at each vertex or
        membrane voxel: D = sum {over all reachable ribosomes} (1 / (d + 1)).
        Adds the density as vertex PropertyMap to the graph. Returns an array
        with the same shape as the underlying segmentation with the densities
        plus 1, in order to distinguish membrane voxels with 0 density from the
        background.

        Args:
            mask (numpy.ndarray, optional): a binary mask of the ribosome
                centers as 3D array where indices are coordinates in pixels
                (default None)
            target_coordinates (numpy.ndarray, optional): the ribosome centers
                coordinates in nm as 2D array in format
                [[x1, y1, z1], [x2, y2, z2], ...] (default None)
            verbose (boolean, optional): if True (default False), some extra
                information will be printed out

        Returns:
            a 3D numpy ndarray with the densities + 1

        Note:
            One of the first two parameters, mask or target_coordinates, has to
            be given.
        """
        import ribosome_density as rd
        # If a mask is given, find the set of voxels of ribosome centers mapped
        # on the membrane, 'target_voxels', and rescale them to nm,
        # 'target_coordinates':
        if mask is not None:
            if mask.shape != (self.scale_x, self.scale_y, self.scale_z):
                error_msg = ("Scales of the input 'mask' have to be equal to "
                             "those set during the generation of the graph.")
                raise pexceptions.PySegInputError(
                    expr='calculate_density (SegmentationGraph)', msg=error_msg
                )
            # output as a list of tuples [(x1,y1,z1), (x2,y2,z2), ...] in pixels
            target_voxels = rd.get_foreground_voxels_from_mask(mask)
            # for rescaling have to convert to an ndarray
            target_ndarray_voxels = rd.tupel_list_to_ndarray_voxels(
                target_voxels
            )
            # rescale to nm, output an ndarray [[x1,y1,z1], [x2,y2,z2], ...]
            target_ndarray_coordinates = (target_ndarray_voxels
                                          * self.scale_factor_to_nm)
            # convert to a list of tuples, which are in nm now
            target_coordinates = rd.ndarray_voxels_to_tupel_list(
                target_ndarray_coordinates
            )
        # If target_coordinates are given (in nm), convert them from a numpy
        # ndarray to a list of tuples:
        elif target_coordinates is not None:
            target_coordinates = rd.ndarray_voxels_to_tupel_list(
                target_coordinates
            )
        # Exit if the target_voxels list is empty:
        if len(target_coordinates) == 0:
            error_msg = ("No target voxels were found! Check your input "
                         "('mask' or 'target_coordinates').")
            raise pexceptions.PySegInputError(
                expr='calculate_density (SegmentationGraph)', msg=error_msg
            )
        print '%s target voxels' % len(target_coordinates)
        if verbose:
            print target_coordinates

        # Pre-filter the target coordinates to those existing in the graph
        # (should already all be in the graph, but just in case):
        target_coordinates_in_graph = []
        for target_xyz in target_coordinates:
            if target_xyz in self.coordinates_to_vertex_index:
                target_coordinates_in_graph.append(target_xyz)
            else:
                error_msg = ('Target (%s, %s, %s) not inside the membrane!'
                             % (target_xyz[0], target_xyz[1], target_xyz[2]))
                raise pexceptions.PySegInputWarning(
                    expr='calculate_density (SegmentationGraph)', msg=error_msg
                )

        print '%s target coordinates in graph' % len(
            target_coordinates_in_graph)
        if verbose:
            print target_coordinates_in_graph

        # Get all indices of the target coordinates:
        target_vertices_indices = []
        for target_xyz in target_coordinates_in_graph:
            v_target_index = self.coordinates_to_vertex_index[target_xyz]
            target_vertices_indices.append(v_target_index)

        # Density calculation
        # Add a new vertex property to the graph, density:
        self.graph.vp.density = self.graph.new_vertex_property("float")
        # Dictionary mapping voxel coordinates (for the volume returned later)
        # to a list of density values falling within that voxel:
        voxel_to_densities = {}

        # For each vertex in the graph:
        for v_membrane in self.graph.vertices():
            # Get its coordinates:
            membrane_xyz = self.graph.vp.xyz[v_membrane]
            if verbose:
                print ('Membrane vertex (%s, %s, %s)'
                       % (membrane_xyz[0], membrane_xyz[1], membrane_xyz[2]))
            # Get a distance map with all pairs of distances between current
            # graph vertex (membrane_xyz) and target vertices (ribosome
            # coordinates):
            dist_map = shortest_distance(self.graph, source=v_membrane,
                                         target=target_vertices_indices,
                                         weights=self.graph.ep.distance)

            # Iterate over all shortest distances from the membrane vertex to
            # the target vertices, while calculating the density:
            # Initializing: membrane coordinates with no reachable ribosomes
            # will have a value of 0, those with reachable ribosomes > 0.
            density = 0
            # If there is only one target voxel, dist_map is a single value -
            # wrap it into a list.
            if len(target_coordinates_in_graph) == 1:
                dist_map = [dist_map]
            for d in dist_map:
                if verbose:
                    print '\tTarget vertex ...'
                # if unreachable, the maximum float64 is stored
                if d == np.finfo(np.float64).max:
                    if verbose:
                        print '\t\tunreachable'
                else:
                    if verbose:
                        print '\t\td = %s' % d
                    density += 1 / (d + 1)

            # Add the density of the membrane vertex as a property of the
            # current vertex in the graph:
            self.graph.vp.density[v_membrane] = density

            # Calculate the corresponding voxel of the vertex and add the
            # density to the list keyed by the voxel in the dictionary:
            # Scaling the coordinates back from nm to voxels. (Without round
            # float coordinates are truncated to the next lowest integer.)
            voxel_x = int(round(membrane_xyz[0] / self.scale_factor_to_nm))
            voxel_y = int(round(membrane_xyz[1] / self.scale_factor_to_nm))
            voxel_z = int(round(membrane_xyz[2] / self.scale_factor_to_nm))
            voxel = (voxel_x, voxel_y, voxel_z)
            if voxel in voxel_to_densities:
                voxel_to_densities[voxel].append(density)
            else:
                voxel_to_densities[voxel] = [density]

            if verbose:
                print '\tdensity = %s' % density
            if (self.graph.vertex_index[v_membrane] + 1) % 1000 == 0:
                now = datetime.now()
                print ('%s membrane vertices processed on: %s-%s-%s %s:%s:%s'
                       % (self.graph.vertex_index[v_membrane] + 1, now.year,
                          now.month, now.day, now.hour, now.minute, now.second))

        # Initialize an array scaled like the original segmentation, which will
        # hold in each membrane voxel the maximal density among the
        # corresponding vertex coordinates in the graph plus 1 and 0 in each
        # background (non-membrane) voxel:
        densities = np.zeros((self.scale_x, self.scale_y, self.scale_z),
                             dtype=np.float16)
        # The densities array membrane voxels are initialized with 1 in order to
        # distinguish membrane voxels from the background.
        for voxel in voxel_to_densities:
            densities[voxel[0], voxel[1], voxel[2]] = 1 + max(
                voxel_to_densities[voxel])
        if verbose:
            print 'densities:\n%s' % densities
        return densities

    def graph_to_points_and_lines_polys(self, vertices=True, edges=True,
                                        verbose=False):
        """
        Generates a VTK PolyData object from the graph with vertices as
        vertex-cells (containing 1 point) and edges as line-cells (containing 2
        points).

        Args:
            vertices (boolean, optional): if True (default) vertices are stored
                a VTK PolyData object as vertex-cells
            edges (boolean, optional): if True (default) edges are stored a VTK
                PolyData object as line-cells
            verbose (boolean, optional): if True (default False), some extra
                information will be printed out

        Returns:
            - vtk.vtkPolyData with vertex-cells
            - vtk.vtkPolyData with edges as line-cells
        """
        # Initialization
        poly_verts = vtk.vtkPolyData()
        poly_lines = vtk.vtkPolyData()
        points = vtk.vtkPoints()
        vertex_arrays = list()
        edge_arrays = list()
        # Vertex property arrays
        for prop_key in self.graph.vp.keys():
            data_type = self.graph.vp[prop_key].value_type()
            if (data_type != 'string' and data_type != 'python::object' and
                    prop_key != 'xyz'):
                if verbose:
                    print '\nvertex property key: %s' % prop_key
                    print 'value type: %s' % data_type
                if data_type[0:6] != 'vector':  # scalar
                    num_components = 1
                else:  # vector
                    num_components = len(
                        self.graph.vp[prop_key][self.graph.vertex(0)]
                    )
                array = TypesConverter().gt_to_vtk(data_type)
                array.SetName(prop_key)
                if verbose:
                    print 'number of components: %s' % num_components
                array.SetNumberOfComponents(num_components)
                vertex_arrays.append(array)
        # Edge property arrays
        for prop_key in self.graph.ep.keys():
            data_type = self.graph.ep[prop_key].value_type()
            if data_type != 'string' and data_type != 'python::object':
                if verbose:
                    print '\nedge property key: %s' % prop_key
                    print 'value type: %s' % data_type
                if data_type[0:6] != 'vector':  # scalar
                    num_components = 1
                else:  # vector (all edge properties so far are scalars)
                    # num_components = len(
                    #     self.graph.ep[prop_key][self.graph.edge(0, 1)]
                    # )
                    num_components = 3
                    if verbose:
                        print ('Sorry, not implemented yet, assuming a vector '
                               'with 3 components.')
                array = TypesConverter().gt_to_vtk(data_type)
                array.SetName(prop_key)
                if verbose:
                    print 'number of components: %s' % num_components
                array.SetNumberOfComponents(num_components)
                edge_arrays.append(array)
        if verbose:
            print '\nvertex arrays length: %s' % len(vertex_arrays)
            print 'edge arrays length: %s' % len(edge_arrays)

        # Geometry
        lut = np.zeros(shape=self.graph.num_vertices(), dtype=np.int)
        for i, vd in enumerate(self.graph.vertices()):
            [x, y, z] = self.graph.vp.xyz[vd]
            points.InsertPoint(i, x, y, z)
            lut[self.graph.vertex_index[vd]] = i
        if verbose:
            print 'number of points: %s' % points.GetNumberOfPoints()

        # Topology
        # Vertices
        verts = vtk.vtkCellArray()
        if vertices:
            for vd in self.graph.vertices():  # vd = vertex descriptor
                verts.InsertNextCell(1)
                verts.InsertCellPoint(lut[self.graph.vertex_index[vd]])
                for array in vertex_arrays:
                    prop_key = array.GetName()
                    n_comp = array.GetNumberOfComponents()
                    data_type = self.graph.vp[prop_key].value_type()
                    data_type = TypesConverter().gt_to_numpy(data_type)
                    array.InsertNextTuple(self.get_vertex_prop_entry(
                        prop_key, vd, n_comp, data_type))
            if verbose:
                print 'number of vertex cells: %s' % verts.GetNumberOfCells()
        # Edges
        lines = vtk.vtkCellArray()
        if edges:
            for ed in self.graph.edges():  # ed = edge descriptor
                lines.InsertNextCell(2)
                lines.InsertCellPoint(lut[self.graph.vertex_index[ed.source()]])
                lines.InsertCellPoint(lut[self.graph.vertex_index[ed.target()]])
                for array in edge_arrays:
                    prop_key = array.GetName()
                    n_comp = array.GetNumberOfComponents()
                    data_type = self.graph.ep[prop_key].value_type()
                    data_type = TypesConverter().gt_to_numpy(data_type)
                    array.InsertNextTuple(self.get_edge_prop_entry(
                        prop_key, ed, n_comp, data_type))
            if verbose:
                print 'number of line cells: %s' % lines.GetNumberOfCells()

        # vtkPolyData construction
        poly_verts.SetPoints(points)
        poly_lines.SetPoints(points)
        if vertices:
            poly_verts.SetVerts(verts)
        if edges:
            poly_lines.SetLines(lines)
        for array in vertex_arrays:
            poly_verts.GetCellData().AddArray(array)
        for array in edge_arrays:
            poly_lines.GetCellData().AddArray(array)

        return poly_verts, poly_lines

    def get_vertex_prop_entry(self, prop_key, vertex_descriptor, n_comp,
                              data_type):
        """
        Gets a property value of a vertex for inserting into a VTK vtkDataArray
        object.

        This private function is used by the methods
        graph_to_points_and_lines_polys and graph_to_triangle_poly (the latter
        of the derived class surface_graphs.TriangleGraph).

        Args:
            prop_key (str): name of the desired vertex property
            vertex_descriptor (graph_tool.Vertex): vertex descriptor of the
                current vertex
            n_comp (int): number of components of the array (length of the
                output tuple)
            data_type: numpy data type converted from a graph-tool property
                value type by TypesConverter().gt_to_numpy

        Returns:
            a tuple (with length like n_comp) with the property value of the
            vertex converted to a numpy data type
        """
        prop = list()
        if n_comp == 1:
            prop.append(data_type(self.graph.vp[prop_key][vertex_descriptor]))
        else:
            for i in range(n_comp):
                prop.append(data_type(
                            self.graph.vp[prop_key][vertex_descriptor][i]))
        return tuple(prop)

    def get_edge_prop_entry(self, prop_key, edge_descriptor, n_comp, data_type):
        """
        Gets a property value of an edge for inserting into a VTK vtkDataArray
        object.

        This private function is used by the method
        graph_to_points_and_lines_polys.

        Args:
            prop_key (str): name of the desired vertex property
            edge_descriptor (graph_tool.Edge): edge descriptor of the current
                edge
            n_comp (int): number of components of the array (length of the
                output tuple)
            data_type: numpy data type converted from a graph-tool property
                value type by TypesConverter().gt_to_numpy

        Returns:
            a tuple (with length like n_comp) with the property value of the
            edge converted to a numpy data type
        """
        prop = list()
        if n_comp == 1:
            prop.append(data_type(self.graph.ep[prop_key][edge_descriptor]))
        else:
            for i in range(n_comp):
                prop.append(data_type(
                            self.graph.ep[prop_key][edge_descriptor][i]))
        return tuple(prop)

    # * The following SegmentationGraph methods are needed for the normal vector
    # voting algorithm. *

    def calculate_average_edge_length(self, prop_e=None, value=1):
        """
        Calculates the average edge length in the graph.

        If a special edge property is specified, includes only the edges where
        this property equals the given value. If there are no edges in the
        graph, the given property does not exist or there are no edges with the
        given property equaling the given value, None is returned.

        Args:
            prop_e (str, optional): edge property, if specified only edges where
                this property equals the given value will be considered
            value (int, optional): value of the specified edge property an edge
                has to have in order to be considered (default 1)

        Returns:
            the average edge length in the graph (float) or None
        """
        total_edge_length = 0
        average_edge_length = None
        if prop_e is None:
            print "Considering all edges:"
            for ed in self.graph.edges():
                total_edge_length += self.graph.ep.distance[ed]
            if self.graph.num_edges() > 0:
                average_edge_length = total_edge_length / self.graph.num_edges()
            else:
                print "There are no edges in the graph!"
        elif prop_e in self.graph.edge_properties:
            print ("Considering only edges with property %s equaling value %s "
                   % (prop_e, value))
            num_special_edges = 0
            for ed in self.graph.edges():
                if self.graph.edge_properties[prop_e][ed] == value:
                    num_special_edges += 1
                    total_edge_length += self.graph.ep.distance[ed]
            if num_special_edges > 0:
                average_edge_length = total_edge_length / num_special_edges
            else:
                print ("There are no edges with the property %s equaling value "
                       "%s!" % (prop_e, value))
        print "Average length: %s" % average_edge_length
        return average_edge_length

    def find_geodesic_neighbors(self, v, g_max, verbose=False):
        """
        Finds geodesic neighbor vertices of a given vertex v in the graph that
        are within a given maximal geodesic distance g_max from it.

        Also finds the corresponding geodesic distances. All edges are
        considered.

        Args:
            v (graph_tool.Vertex): the source vertex
            g_max: maximal geodesic distance (in nanometers, if the graph was
                scaled)
            verbose (boolean, optional): if True (default False), some extra
                information will be printed out

        Returns:
            a dictionary mapping a neighbor vertex index to the geodesic
            distance from vertex v
        """
        dist_v = shortest_distance(self.graph, source=v, target=None,
                                   weights=self.graph.ep.distance,
                                   max_dist=g_max)
        dist_v = dist_v.get_array()

        neighbor_id_to_dist = dict()

        idxs = np.where(dist_v <= g_max)[0]
        for idx in idxs:
            dist = dist_v[idx]
            if dist != 0:  # ignore the source vertex itself
                neighbor_id_to_dist[idx] = dist

        if verbose:
            print "%s neighbors" % len(neighbor_id_to_dist)
        return neighbor_id_to_dist

    def get_vertex_property_array(self, property_name):
        """
        Gets a numpy array with all values of a vertex property of the graph,
        printing out the number of values, the minimal and the maximal value.

        Args:
            property_name (str): vertex property name

        Returns:
            an array (numpy.ndarray) with all values of the vertex property
        """
        if (isinstance(property_name, str) and
                property_name in self.graph.vertex_properties):
            values = self.graph.vertex_properties[property_name].get_array()
            print '%s "%s" values' % (len(values), property_name)
            print 'min = %s, max = %s' % (min(values), max(values))
            return values
        else:
            error_msg = ('The input "%s" is not a str object or is not found '
                         'in vertex properties of the graph.' % property_name)
            raise pexceptions.PySegInputError(
                expr='get_vertex_property_array (SegmentationGraph)',
                msg=error_msg)
Example #17
0
def shortest_path_visiting_most_nodes(g: gt.Graph,
                                      adjusted_weight: gt.EdgePropertyMap,
                                      covered_vertices, summed_edge_weight):

    dist_map = gt.topology.shortest_distance(g, weights=adjusted_weight)

    not_visited_source_vertex = np.ones(g.num_vertices(), dtype=np.bool)
    not_visited_source_vertex[list(covered_vertices)] = False
    not_visited_source_vertex = not_visited_source_vertex.reshape(
        g.num_vertices(), 1)

    all_dists = dist_map.get_2d_array(
        range(g.num_vertices())
    ).T  #shortest path does only count the edges. so we have add one if the starting vertex was not visited.

    all_dists[(all_dists > summed_edge_weight) | (all_dists < 0)] = 0

    all_dists = (g.num_vertices() + 1 - all_dists) % (g.num_vertices() + 1)

    shortest_paths = []
    all_currently_covered_vertices = set()
    current_length = -1
    z = 0
    n = g.num_vertices()

    #if the longest shortest path covers only <= 2 new nodes go to fast mode:
    #simply add edges covering two vertices until not possible and then the remaining vertices.
    if (all_dists + not_visited_source_vertex).max() <= 2:
        covered_now = np.zeros(n, dtype=np.bool)
        for e in g.edges():
            if int(e.source()) == int(e.target()):
                continue
            if int(e.source()) not in covered_vertices and int(
                    e.target()) not in covered_vertices and not covered_now[
                        int(e.source())] and not covered_now[int(e.target())]:
                shortest_paths.append([int(e.source()), int(e.target())])
                all_currently_covered_vertices.add(int(e.source()))
                all_currently_covered_vertices.add(int(e.target()))
                covered_now[int(e.source())] = True
                covered_now[int(e.target())] = True
        single_vertices = set(range(n)).difference(
            covered_vertices.union(all_currently_covered_vertices))

        for i in single_vertices:
            shortest_paths.append([i])

        return shortest_paths
    else:
        max_value = (all_dists + not_visited_source_vertex).max()

        had_source = np.zeros(n, dtype=np.bool)

        for source, target in np.array(
                np.where(all_dists +
                         not_visited_source_vertex == max_value)).T:
            if had_source[
                    source] or source in all_currently_covered_vertices or target in all_currently_covered_vertices:
                continue
            shortest_path, _ = gt.topology.shortest_path(
                g, source, target, adjusted_weight)
            shortest_path = [int(v) for v in shortest_path]
            if (all_dists + not_visited_source_vertex).max() != len(
                    set(shortest_path).difference(covered_vertices)):
                exit(10)

            if len(all_currently_covered_vertices.intersection(
                    shortest_path)) != 0:
                continue
            if len(shortest_path) > 1 and len(shortest_path) < current_length:
                #print(len(shortest_paths))
                return shortest_paths

            shortest_paths.append(shortest_path)
            all_currently_covered_vertices = all_currently_covered_vertices.union(
                shortest_path)
            if current_length < 0:
                current_length = len(shortest_path)
            #trim covered vertices from start and end
            #...
            #better: build this step directly into the weight function s.t. |P| is minimized as a third priority?

            if len(shortest_path) <= 2:  # and z >=10:
                break

            had_source[source] = True

    return shortest_paths
Example #18
0
def compute_hull(g: gt.Graph,
                 S,
                 weight=None,
                 dist_map=None,
                 comps=None,
                 hist=None,
                 compute_closure=True,
                 already_closed=None):
    """

    :param g:
    :param S:
    :param weight: if = None, unit distance is used, which is faster.
    :param dist_map: n*n array with the pairwise shortest distances. if = None, the function will compute it itself
    :param comps:
    :param hist:
    :param compute_closure: #hull=closure or geodetic set, which is faster
    :return:
    """
    n = g.num_vertices()

    I_S = np.zeros(g.num_vertices(), dtype=np.bool)

    I_S[S] = True

    q = queue.Queue()

    for v in S:
        if already_closed is None or v not in already_closed:
            q.put(v)

    if dist_map is None:
        dist_map = gt.topology.shortest_distance(g,
                                                 weights=weight).get_2d_array(
                                                     range(n)).T
        #dist_map = shortest_distance(g, weights=weight).get_2d_array(range(n))
        #is possible but is super slow and memory heavy for some reason. not possible on my 16gb machine with graphs |V| roughly 15k.

    while not q.empty():

        v = q.get()
        if compute_closure:
            starting_nodes = np.arange(g.num_vertices())[I_S]
        else:
            starting_nodes = np.arange(g.num_vertices())[S]
        starting_nodes = starting_nodes  #[starting_nodes > v] #assume undirected

        if comps is not None and not g.is_directed():
            vs_comp = comps.a[v]
            vs_comp = np.where(comps.a == vs_comp)[0]

            if np.all(I_S[vs_comp]):
                continue

        #all vertices x s.t. d(v,x)+d(x,s)=d(v,s) for some s \in S. These are exactly the ones on any shortest v-s-paths.
        #visited_nodes = np.any(dist_map[v,:]+dist_map[:,starting_nodes].T==dist_map[v,starting_nodes][:,np.newaxis],axis=0)

        visited_nodes = np.zeros(n, dtype=np.bool)

        #careful this is not linear runtime. but constructing the "predecessor dag" is very slow with the Visitor classes.
        if not g.is_directed():
            #debug= set()
            '''for s in starting_nodes:
                #if s <= v:
                #    continue
                #if already_closed is not None:
                #    if already_closed[v] and already_closed[s]:
                #        #print("yay")
                #        continue
                debug = debug.union(np.where(dist_map[v]+dist_map[s]==dist_map[v,s])[0])
                #visited_nodes[np.where(dist_map[v].a+dist_map[s].a==dist_map[v].a[s])[0]] = True'''

            visited_nodes[np.any(dist_map[v, :] + dist_map[:, starting_nodes].T
                                 == dist_map[v, starting_nodes][:, np.newaxis],
                                 axis=0)] = True

            #first_mins = starting_nodes[np.argmin(dist_map[:, starting_nodes], axis=1)]
            #second_mins = starting_nodes[np.argpartition(dist_map[:, starting_nodes], 1, axis=1)[:, 1].astype(np.int)]

            #visited_nodes[dist_map[first_mins, range(n)]+ dist_map[range(n),second_mins] == dist_map[first_mins,second_mins]] = True
        else:
            '''if np.issubclass_(dist_map[v].a.dtype, numbers.Integral):
                max_value = np.iinfo(dist_map[v].a.dtype).max
            else:
                max_value = np.finfo(dist_map[v].a.dtype).max
            visited_nodes[
                np.any(dist_map[v, :] + dist_map[:, starting_nodes].T == dist_map[v, starting_nodes][:, np.newaxis],
                       axis=0)] = True'''
            #reachable_starting_nodes = starting_nodes[dist_map[v].a[starting_nodes] < max_value]
            '''for i in range(n):
                if I_S[i]:
                    continue
                if np.any(dist_map[v].a[i] + dist_map[i].a[[reachable_starting_nodes]] == dist_map[v].a[reachable_starting_nodes]):
                    visited_nodes[i] = True'''

            visited_nodes[np.any(dist_map[v, :] + dist_map[:, starting_nodes].T
                                 == dist_map[v, starting_nodes][:, np.newaxis],
                                 axis=0)] = True

        if compute_closure:
            for i in range(n):
                if not I_S[i] and visited_nodes[i]:
                    q.put(i)

        I_S[visited_nodes] = True

        #early stopping if already covered all the connected components of S
        if comps is not None and not g.is_directed():
            if np.sum(I_S) == np.sum(hist[np.unique(comps.get_array()[I_S])]):
                break
        elif np.sum(I_S) == n:
            break

        #print (np.sum(I_S), n)

    return I_S
Example #19
0
    def makeGraph(self,img,dia,xScale,yScale):
        print 'Building Graph Data Structure'
        start=time.time()
        G = Graph(directed=False)
        vprop=G.new_vertex_property('object')
        eprop=G.new_edge_property('object')
        epropW=G.new_edge_property("int32_t")
        avgScale=(xScale+yScale)/2

        test=np.where(img==True)
        ss = np.shape(test)
        cccc=0
        percentOld=0.0
        print str(np.round(percentOld,1))+'%'
        for (i,j) in zip(test[1],test[0]):
                cccc+=1
                percent=(float(cccc)/float(ss[1]))*100
                if percentOld+10< percent: 
                    print str(np.round(percent,1))+'%'
                    percentOld=percent
                nodeNumber1 = (float(i)*yScale,float(j)*xScale)
                if gu.find_vertex(G, vprop, {'imgIdx':(j,i),'coord':nodeNumber1, 'nrOfPaths':0, 'diameter':float(dia[j][i])*avgScale}):
                            v1=gu.find_vertex(G, vprop, {'imgIdx':(j,i),'coord':nodeNumber1, 'nrOfPaths':0, 'diameter':float(dia[j][i])*avgScale})[0]
                else:
                    v1=G.add_vertex()
                    vprop[G.vertex(v1)]={'imgIdx':(j,i),'coord':nodeNumber1, 'nrOfPaths':0, 'diameter':float(dia[j][i])*avgScale}
                try:
                    
                    if img[j,i+1] == True:
                        nodeNumber2 = (float(i+1)*yScale,float(j)*xScale)
                        if gu.find_vertex(G, vprop, {'imgIdx':(j,i+1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i+1])*avgScale}):
                            v2=gu.find_vertex(G, vprop, {'imgIdx':(j,i+1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i+1])*avgScale})[0]
                            if gu.find_edge(G, eprop, {'coord1':vprop[v2]['coord'], 'coord2':vprop[v1]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}):
                                pass
                            else:
                                e = G.add_edge(v1, v2)
                                epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4
                                eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}
                        else:
                            v2=G.add_vertex()
                            vprop[G.vertex(v2)]={'imgIdx':(j,i+1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i+1])*avgScale}
                            e = G.add_edge(v1, v2)
                            epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4
                            eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}
                except:
                    pass
                try:
                    if img[j,i-1] == True:
                        nodeNumber2 = (float(i-1)*yScale,float(j)*xScale)
                        if gu.find_vertex(G, vprop, {'imgIdx':(j,i-1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i-1])*avgScale}):
                            v2=gu.find_vertex(G, vprop, {'imgIdx':(j,i-1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i-1])*avgScale})[0]
                            if gu.find_edge(G, eprop, {'coord1':vprop[v2]['coord'], 'coord2':vprop[v1]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}):
                                pass
                            else:
                                e = G.add_edge(v1, v2)
                                epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4
                                eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}
                        else:
                            v2=G.add_vertex()
                            vprop[G.vertex(v2)]={'imgIdx':(j,i-1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i-1])*avgScale}
                            e = G.add_edge(v1, v2)
                            epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4
                            eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}
                except:pass
                try:
                    if img[j + 1,i] == True:
                        nodeNumber2 = (float(i)*yScale,float(j+1)*xScale)
                        if gu.find_vertex(G, vprop, {'imgIdx':(j+1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j+1][i])*avgScale}):
                            v2=gu.find_vertex(G, vprop, {'imgIdx':(j+1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j+1][i])*avgScale})[0]
                            if gu.find_edge(G, eprop, {'coord1':vprop[v2]['coord'], 'coord2':vprop[v1]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}):
                                pass
                            else:
                                e = G.add_edge(v1, v2)
                                epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4
                                eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}
                        else:
                            v2=G.add_vertex()
                            vprop[G.vertex(v2)]={'imgIdx':(j+1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j+1][i])*avgScale}
                            e = G.add_edge(v1, v2)
                            epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4
                            eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}
                except:pass
                try:
                    if img[j - 1,i] == True:
                        nodeNumber2 = (float(i)*yScale,float(j-1)*xScale)
                        if gu.find_vertex(G, vprop, {'imgIdx':(j-1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j-1][i])*avgScale}):
                            v2=gu.find_vertex(G, vprop, {'imgIdx':(j-1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j-1][i])*avgScale})[0]
                            if gu.find_edge(G, eprop, {'coord1':vprop[v2]['coord'], 'coord2':vprop[v1]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}):
                                pass
                            else:
                                e = G.add_edge(v1, v2)
                                epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4
                                eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}
                        else:
                            v2=G.add_vertex()
                            vprop[G.vertex(v2)]={'imgIdx':(j-1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j-1][i])*avgScale}
                            e = G.add_edge(v1, v2)
                            epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4
                            eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}
                except: pass
#                    
        print '100.0%'
        print 'selecting largest connected component'
        G.edge_properties["ep"] = eprop
        G.edge_properties["w"] = epropW
        G.vertex_properties["vp"] = vprop
        l = gt.label_largest_component(G)
        print(l.a)
        u = gt.GraphView(G, vfilt=l)
        print '# vertices'
        print(u.num_vertices())
        print(G.num_vertices())
        print '# edges'
        print(u.num_edges())
        print 'building graph finished in: '+str(time.time()-start)+'s'
        return u 
Example #20
0
    def evaluate_sampling(self, full_graph: Graph, sampled_graph: Graph,
                          full_partition: BlockState,
                          sampled_graph_partition: BlockState,
                          block_mapping: Dict[int, int],
                          vertex_mapping: Dict[int,
                                               int], assignment: np.ndarray):
        """Evaluates the goodness of the samples.

        Parameters
        ----------
        full_graph : Graph
            the full, unsampled Graph object
        sampled_graph : Graph
            the sampled graph
        full_partition : Partition
            the partitioning results on the full graph
        sampled_graph_partition : Partition
            the partitioning results on the sampled graph
        block_mapping : Dict[int, int]
            the mapping of blocks from the full graph to the sampled graph
        vertex_mapping : Dict[int, int]
            the mapping of vertices from the full graph to the sampled graph
        assignment : np.ndarray[int]
            the true vertex-to-community mapping
        """
        #####
        # General
        #####
        self.sampled_graph_num_vertices = sampled_graph.num_vertices()
        self.sampled_graph_num_edges = sampled_graph.num_edges()
        self.blocks_retained = sampled_graph_partition.get_B(
        ) / full_partition.get_B()
        # pseudo_diameter returns a tuple: (diameter, (start_vertex, end_vertex))
        self.sampled_graph_diameter = pseudo_diameter(sampled_graph)[0]
        self.full_graph_diameter = pseudo_diameter(full_graph)[0]
        for vertex in sampled_graph.vertices():
            if (vertex.in_degree() + vertex.out_degree()) == 0:
                self.sampled_graph_island_vertices += 1
        self.sampled_graph_largest_component = extract_largest_component(
            sampled_graph, directed=False).num_vertices()
        self.full_graph_largest_component = extract_largest_component(
            full_graph, directed=False).num_vertices()

        ######
        # Expansion quality (http://portal.acm.org/citation.cfm?doid=1772690.1772762)
        ######
        # Expansion factor = Neighbors of sample / size of sample
        # Maximum expansion factor = (size of graph - size of sample) / size of sample
        # Expansion quality = Neighbors of sample / (size of graph - size of sample)
        # Expansion quality = 1 means sample is at most 1 edge away from entire graph
        sampled_graph_vertices = set(vertex_mapping.keys())
        neighbors = set()
        for vertex in sampled_graph_vertices:
            for neighbor in full_graph.get_out_neighbors(vertex):
                neighbors.add(neighbor)
        neighbors = neighbors - sampled_graph_vertices
        self.expansion_quality = len(neighbors) / (
            full_graph.num_vertices() - sampled_graph.num_vertices())

        ######
        # Clustering coefficient
        ######
        self.sampled_graph_clustering_coefficient = global_clustering(
            sampled_graph)[0]
        self.full_graph_clustering_coefficient = global_clustering(
            full_graph)[0]

        ######
        # Info on communities
        ######
        self.get_community_details(
            assignment,
            full_partition.get_blocks().get_array(),
            sampled_graph_partition.get_blocks().get_array(), vertex_mapping)

        if np.unique(
                assignment
        ).size == 1:  # Cannot compute below metrics if no true partition is provided
            return

        #####
        # % difference in ratio of within-block to between-block edges
        #####
        sample_assignment = assignment[np.fromiter(vertex_mapping.keys(),
                                                   dtype=np.int32)]
        true_sampled_graph_partition = partition_from_truth(
            sampled_graph, sample_assignment)
        sampled_graph_blockmatrix = true_sampled_graph_partition.get_matrix()
        self.sampled_graph_edge_ratio = sampled_graph_blockmatrix.diagonal(
        ).sum() / sampled_graph_blockmatrix.sum()
        true_full_partition = partition_from_truth(full_graph, assignment)
        full_blockmatrix = true_full_partition.get_matrix()
        self.graph_edge_ratio = full_blockmatrix.diagonal().sum(
        ) / full_blockmatrix.sum()

        #####
        # Normalized difference from ideal-block membership
        #####
        membership_size = max(np.max(assignment),
                              np.max(sample_assignment)) + 1
        full_graph_membership_nums = np.zeros(membership_size)
        for block_membership in assignment:
            full_graph_membership_nums[block_membership] += 1
        sampled_graph_membership_nums = np.zeros(membership_size)
        for block_membership in sample_assignment:
            sampled_graph_membership_nums[block_membership] += 1
        ideal_block_membership_nums = full_graph_membership_nums * \
            (sampled_graph.num_vertices() / full_graph.num_vertices())
        difference_from_ideal_block_membership_nums = np.abs(
            ideal_block_membership_nums - sampled_graph_membership_nums)
        self.difference_from_ideal_sample = np.sum(
            difference_from_ideal_block_membership_nums /
            sampled_graph.num_vertices())
Example #21
0
def spc_semi_supervised_experiments(g: gt.Graph,
                                    weight_prop: gt.EdgePropertyMap, labels):
    np.random.seed(1)
    dist_map = gt.topology.shortest_distance(g, weights=weight_prop)
    W = dist_map.get_2d_array(range(g.num_vertices()))  # original distance map
    new_labels = np.zeros(g.num_vertices())
    new_labels[labels == np.unique(labels)[1]] = 1
    for budget in [10, 20, 50, 100]:
        print("========================================================")
        print("budget: ", budget, "|V|=", g.num_vertices())
        print("==================s2=====================")
        overall_labelling = shortest_shortest_path_querying.s2(
            g, weight_prop, labels, budget)
        print("accuracy after label_prop: ",
              np.sum(overall_labelling == new_labels) / g.num_vertices())
        for _ in range(5):
            starting_vertices = np.random.choice(range(g.num_vertices()),
                                                 budget,
                                                 replace=False)

            known_labels = -np.ones(g.num_vertices()) * np.inf
            known_labels[starting_vertices] = labels[starting_vertices]

            pos_label, neg_label = np.unique(labels)

            pos = np.where(known_labels == pos_label)[0]
            neg = np.where(known_labels == neg_label)[0]
            print("=============without hull===================")
            print("label propagation")
            overall_labelling = label_propagation(W, known_labels,
                                                  np.unique(labels))
            print("accuracy after label_prop: ",
                  np.sum(overall_labelling == new_labels) / g.num_vertices())

            print("=============interval============")
            pos_hull = compute_hull(g,
                                    pos,
                                    weight_prop,
                                    dist_map,
                                    compute_closure=False)
            neg_hull = compute_hull(g,
                                    neg,
                                    weight_prop,
                                    dist_map,
                                    compute_closure=False)
            print("pos", pos.size)
            print("hull size: ", np.sum(pos_hull))
            print("hull correctness overall",
                  np.sum(pos_hull & (labels == pos_label)))
            mask = np.ones(g.num_vertices(), dtype=np.bool)
            mask[pos] = False
            print("hull correctness on new vertices",
                  np.sum(pos_hull[mask] & (labels == pos_label)[mask]))
            known_labels[pos_hull] = pos_label

            print("neg", neg.size)
            print("hull size: ", np.sum(neg_hull))
            print("hull correctness overall",
                  np.sum(neg_hull & (labels == neg_label)))
            mask = np.ones(g.num_vertices(), dtype=np.bool)
            mask[neg] = False
            print("hull correctness on new vertices",
                  np.sum(neg_hull[mask] & (labels == neg_label)[mask]))
            known_labels[neg_hull] = neg_label

            print("label propagation")
            overall_labelling = label_propagation(W, known_labels,
                                                  np.unique(labels))
            print("accuracy after label_prop: ",
                  np.sum(overall_labelling == new_labels) / g.num_vertices())

            print("==============closure=================")
            pos_hull = compute_hull(g, pos, weight_prop, dist_map)
            neg_hull = compute_hull(g, neg, weight_prop, dist_map)
            print("pos", pos.size)
            print("hull size: ", np.sum(pos_hull))
            print("hull correctness overall",
                  np.sum(pos_hull & (labels == pos_label)))
            mask = np.ones(g.num_vertices(), dtype=np.bool)
            mask[pos] = False
            print("hull correctness on new vertices",
                  np.sum(pos_hull[mask] & (labels == pos_label)[mask]))
            known_labels[pos_hull] = pos_label

            print("neg", neg.size)
            print("hull size: ", np.sum(neg_hull))
            print("hull correctness overall",
                  np.sum(neg_hull & (labels == neg_label)))
            mask = np.ones(g.num_vertices(), dtype=np.bool)
            mask[neg] = False
            print("hull correctness on new vertices",
                  np.sum(neg_hull[mask] & (labels == neg_label)[mask]))
            print("label propagation")
            known_labels[neg_hull] = neg_label

            overall_labelling = label_propagation(W, known_labels,
                                                  np.unique(labels))
            print("accuracy after label_prop: ",
                  np.sum(overall_labelling == new_labels) / g.num_vertices())
Example #22
0
    def __init__(self, args: Namespace, graph: Graph) -> None:
        """Creates a new Evaluation object.

        Parameters
        ----------
        args : Namespace
            the command-line arguments
        graph : Graph
            the loaded graph to be partitioned
        """
        # CSV file into which to write the results
        self.args = args
        self.csv_file = args.csv + ".csv"
        self.csv_details_file = args.csv + "_details.csv"
        # Dataset parameters
        self.block_size_variation = args.blockSizeVar
        self.block_overlap = args.overlap
        self.streaming_type = args.type
        self.num_nodes = graph.num_vertices()
        self.num_edges = graph.num_edges()
        # Sampling evaluation
        self.blocks_retained = 0.0
        self.graph_edge_ratio = 0.0
        self.difference_from_ideal_sample = 0.0
        self.expansion_quality = 0.0
        self.sampled_graph_clustering_coefficient = 0.0
        self.full_graph_clustering_coefficient = 0.0
        self.sampled_graph_diameter = 0
        self.full_graph_diameter = 0
        self.sampled_graph_largest_component = 0
        self.full_graph_largest_component = 0
        self.sampled_graph_island_vertices = 0
        self.sampled_graph_num_vertices = 0
        self.sampled_graph_num_edges = 0
        self.sampled_graph_edge_ratio = 0.0
        self.sampled_graph_num_blocks_algorithm = 0
        self.sampled_graph_num_blocks_truth = 0
        self.sampled_graph_accuracy = 0.0
        self.sampled_graph_rand_index = 0.0
        self.sampled_graph_adjusted_rand_index = 0.0
        self.sampled_graph_pairwise_recall = 0.0
        self.sampled_graph_pairwise_precision = 0.0
        self.sampled_graph_entropy_algorithm = 0.0
        self.sampled_graph_entropy_truth = 0.0
        self.sampled_graph_entropy_algorithm_given_truth = 0.0
        self.sampled_graph_entropy_truth_given_algorithm = 0.0
        self.sampled_graph_mutual_info = 0.0
        self.sampled_graph_missed_info = 0.0
        self.sampled_graph_erroneous_info = 0.0
        # Algorithm parameters
        self.num_block_proposals = args.blockProposals
        self.beta = args.beta
        self.sample_size = args.sample_size
        self.sampling_iterations = args.sample_iterations
        self.sampling_algorithm = args.sample_type
        self.delta_entropy_threshold = args.threshold
        self.nodal_update_threshold_strategy = args.nodal_update_strategy
        self.nodal_update_threshold_factor = args.factor
        self.nodal_update_threshold_direction = args.direction
        # Goodness of partition measures
        self.num_blocks_algorithm = 0
        self.num_blocks_truth = 0
        self.accuracy = 0.0
        self.rand_index = 0.0
        self.adjusted_rand_index = 0.0
        self.pairwise_recall = 0.0
        self.pairwise_precision = 0.0
        self.entropy_algorithm = 0.0
        self.entropy_truth = 0.0
        self.entropy_algorithm_given_truth = 0.0
        self.entropy_truth_given_algorithm = 0.0
        self.mutual_info = 0.0
        self.missed_info = 0.0
        self.erroneous_info = 0.0
        self.sampled_graph_description_length = 0.0
        self.max_sampled_graph_description_length = 0.0
        self.full_graph_description_length = 0.0
        self.max_full_graph_description_length = 0.0
        self.sampled_graph_modularity = 0.0
        self.full_graph_modularity = 0.0
        # Algorithm runtime measures
        self.loading = 0.0
        self.sampling = 0.0
        self.sampled_graph_partition_time = 0.0
        self.total_partition_time = 0.0
        self.merge_sample = 0.0
        self.propagate_membership = 0.0
        self.finetune_membership = 0.0
        self.prepare_next_partitions = list()  # type: List[float]
        # self.finetuning_details = None
        # Community details
        self.real_communities = dict()  # type: Dict[int, int]
        self.algorithm_communities = dict()  # type: Dict[int, int]
        self.sampled_graph_real_communities = dict()  # type: Dict[int, int]
        self.sampled_graph_algorithm_communities = dict(
        )  # type: Dict[int, int]
        self.contingency_table = None  # type: np.ndarray
        self.sampled_graph_contingency_table = None  # type: np.ndarray
Example #23
0
def spc_querying_with_shadow(g: graph_tool.Graph, paths, weights, y):
    '''

    :param g:
    :param paths: list of paths
    :param y: ground truth
    :param weight:
    :return:
    '''
    np.random.seed(55)
    #these two lines make repetitive closure computation a lot faster
    dist_map = graph_tool.topology.shortest_distance(g, weights=weights).get_2d_array(range(g.num_vertices())).T
    comps, hist = graph_tool.topology.label_components(g)

    known_labels = -np.ones(g.num_vertices())
    num_of_known_labels = 0
    budget = 0

    pos_value, neg_value = np.unique(y)

    next_candidate_queues = [Queue() for _ in paths]
    left = np.zeros(len(paths), dtype=np.int)
    right = np.array([len(p)-1 for p in paths], dtype=np.int)
    queue_idxs = list(range(len(paths)))

    n = g.num_vertices()

    for i,path in enumerate(paths):
        next_candidate_queues[i].put(0)
        if len(path) > 1:
            next_candidate_queues[i].put(len(path)-1)

    starting_idx = np.random.choice(np.where(right>0)[0])
    starting_path = paths[starting_idx]

    budget += 2
    l = next_candidate_queues[starting_idx].get()
    r = next_candidate_queues[starting_idx].get()
    known_labels[starting_path[l]] = y[starting_path[l]]
    known_labels[starting_path[r]] = y[starting_path[r]]

    if known_labels[starting_path[0]] == known_labels[starting_path[-1]]:
        #color the hull of the path in the color of the endpoints
        path_closure = np.where(compute_hull(g, starting_path, weights, dist_map, comps, hist))[0]
        known_labels[path_closure] = known_labels[starting_path[0]]
        num_of_known_labels = len(path_closure)
        del queue_idxs[starting_idx]
    else:
        if (len(starting_path)>=3):
            next_candidate_queues[starting_idx].put(l + (r - l)//2)
        else:
            del queue_idxs[starting_idx]
        num_of_known_labels = 2

    pos = np.where(known_labels==pos_value)[0]
    neg = np.where(known_labels==neg_value)[0]

    candidates = np.zeros(len(paths), dtype=np.int)

    candidates[queue_idxs] = [next_candidate_queues[queue_idx].get() for queue_idx in queue_idxs] #this is always relative to the path

    candidate_pos_hulls = np.zeros((len(paths),n), dtype=np.bool)
    temp_pos_hulls =  np.zeros((n,n), dtype=np.bool)
    if len(pos) > 0:
        candidate_pos_hulls[queue_idxs] = [closure.compute_hull(g, np.append(pos, paths[idx][candidates[idx]]), weights, dist_map, comps, hist) for idx in queue_idxs]
    else:
        for idx in queue_idxs:
            candidate_pos_hulls[idx][paths[idx][candidates[idx]]] = True
    candidate_neg_hulls = np.zeros((len(paths),n), dtype=np.bool)
    temp_neg_hulls = np.zeros((n, n), dtype=np.bool)
    if len(neg) > 0:
        candidate_neg_hulls[queue_idxs] = [closure.compute_hull(g, np.append(neg, paths[idx][candidates[idx]]), weights, dist_map, comps, hist) for idx in queue_idxs]
    else:
        for idx in queue_idxs:
            candidate_neg_hulls[idx][paths[idx][candidates[idx]]] = True
    pos_gains = np.zeros(len(paths))
    neg_gains = np.zeros(len(paths))

    while num_of_known_labels < n:
        to_remove = []
        changed = []
        for idx in queue_idxs:
            while known_labels[paths[idx][candidates[idx]]] >= 0:
                if not next_candidate_queues[idx].empty():
                    candidates[idx] = next_candidate_queues[idx].get()
                else:
                    maybe_remove = refill_queue_for_candidate(idx, candidates[idx], candidates, known_labels, left, next_candidate_queues, paths, queue_idxs, right)
                    if maybe_remove is not None:
                        to_remove.append(maybe_remove)
                        break
                    else:
                        candidates[idx] = next_candidate_queues[idx].get()
                changed.append(idx)

        for i in range(n):
            temp_pos_hulls[i] = closure.compute_hull(g, np.append(pos, i), weights, dist_map, comps, hist, True, pos if len(pos) > 0 else None)
            temp_neg_hulls[i] = closure.compute_hull(g, np.append(neg, i), weights, dist_map, comps, hist, True, neg if len(neg) > 0 else None)

        for i in changed:
            candidate_pos_hulls[i] = closure.compute_shadow(g, np.append(pos, paths[i][candidates[i]]), neg, weights, dist_map, comps, hist, B_hulls=temp_neg_hulls)
            candidate_neg_hulls[i] = closure.compute_shadow(g, np.append(neg, paths[i][candidates[i]]), pos, weights, dist_map, comps, hist, B_hulls=temp_pos_hulls)

        for i in to_remove:
            queue_idxs.remove(i)
            if np.sum(known_labels[paths[i]] >= 0) != len(paths[i]):
                exit(555)

        pos_gains[queue_idxs] = np.sum(candidate_pos_hulls[queue_idxs], axis=1) - len(pos)
        neg_gains[queue_idxs] = np.sum(candidate_neg_hulls[queue_idxs], axis=1) - len(neg)

        heuristic = np.average(np.array([pos_gains[queue_idxs], neg_gains[queue_idxs]]), axis=0)

        candidate_idx = queue_idxs[np.argmax(heuristic)]
        candidate_vertex = candidates[candidate_idx]

        if known_labels[paths[candidate_idx][candidate_vertex]] == y[paths[candidate_idx][candidate_vertex]]:
            exit(9)
        known_labels[paths[candidate_idx][candidate_vertex]] = y[paths[candidate_idx][candidate_vertex]]

        budget += 1

        if known_labels[paths[candidate_idx][candidate_vertex]] == pos_value:
            pos =np.where(candidate_pos_hulls[candidate_idx])[0]
            known_labels[pos]  = pos_value
            #only recompute pos hulls, the negatives won't change
            candidate_pos_hulls[queue_idxs] = [closure.compute_shadow(g, np.append(pos, paths[idx][candidates[idx]]), neg, weights, dist_map, comps, hist, temp_neg_hulls) for idx in queue_idxs]
            candidate_neg_hulls[queue_idxs] = [closure.compute_shadow(g, np.append(neg, paths[idx][candidates[idx]]), pos, weights, dist_map, comps, hist, temp_pos_hulls) for idx in queue_idxs]

        else:
            neg =np.where(candidate_neg_hulls[candidate_idx])[0]
            known_labels[neg] = neg_value
            # only recompute pos hulls, the negatives won't change
            candidate_pos_hulls[queue_idxs] = [closure.compute_shadow(g, np.append(pos, paths[idx][candidates[idx]]), neg, weights, dist_map, comps, hist, temp_neg_hulls) for idx in queue_idxs]

            candidate_neg_hulls[queue_idxs] = [closure.compute_shadow(g, np.append(neg, paths[idx][candidates[idx]]), pos, weights, dist_map, comps, hist, temp_pos_hulls) for idx in queue_idxs]

        if next_candidate_queues[candidate_idx].empty():

            maybe_remove = refill_queue_for_candidate(candidate_idx, candidate_vertex, candidates, known_labels, left, next_candidate_queues, paths, queue_idxs, right)
            if maybe_remove is None:
                candidates[candidate_idx] = next_candidate_queues[candidate_idx].get()
            else:
                queue_idxs.remove(candidate_idx)
        else:
            candidates[candidate_idx] = next_candidate_queues[candidate_idx].get()

        candidate_pos_hulls[candidate_idx] = closure.compute_shadow(g, np.append(pos, paths[candidate_idx][candidates[candidate_idx]]), neg, weights, dist_map, comps, hist, temp_neg_hulls)
        candidate_neg_hulls[candidate_idx] = closure.compute_shadow(g, np.append(neg, paths[candidate_idx][candidates[candidate_idx]]), pos, weights, dist_map, comps, hist, temp_pos_hulls)

        #pos = np.where(known_labels==pos_value)[0]
        #neg = np.where(known_labels==neg_value)[0]
        pos = np.where(compute_hull(g, np.where(known_labels==pos_value)[0], weights, dist_map, comps, hist))[0]
        neg = np.where(compute_hull(g, np.where(known_labels==neg_value)[0], weights, dist_map, comps, hist))[0]
        num_of_known_labels = len(pos) + len(neg)

        print(num_of_known_labels, n)

    return known_labels, budget
Example #24
0
    def makeGraphFast(self, img, dia, xScale, yScale):
        print('Building Graph Data Structure'),
        start = time.time()
        G = Graph(directed=False)
        sumAddVertices = 0

        vprop = G.new_vertex_property('object')
        eprop = G.new_edge_property('object')
        epropW = G.new_edge_property("float")
        h, w = np.shape(img)
        avgScale = (xScale + yScale) / 2

        addedVerticesLine2 = []
        vListLine2 = []
        percentOld = 0
        counter = 0
        '''
        Sweep over each line in the image except the last line
        '''
        for idx, i in enumerate(img[:len(img) - 2]):
            '''
            Get foreground indices in the current line of the image and make vertices
            '''
            counter += 1
            percent = (float(counter) / float(h)) * 100
            if percentOld + 10 < percent:
                print(str(np.round(percent, 1)) + '% '),
                percentOld = percent

            line1 = np.where(i == True)
            if len(line1[0]) > 0:
                line1 = set(line1[0]).difference(set(addedVerticesLine2))
                vL = G.add_vertex(len(list(line1)))

                if len(line1) > 1:
                    vList = vListLine2 + list(vL)
                else:
                    vList = vListLine2 + [vL]
                line1 = addedVerticesLine2 + list(line1)
                for jdx, j in enumerate(line1):
                    vprop[vList[jdx]] = {
                        'imgIdx': (j, idx),
                        'coord': (float(j) * xScale, float(idx) * yScale),
                        'nrOfPaths': 0,
                        'diameter': float(dia[idx][j]) * avgScale
                    }
                '''
                keep order of the inserted vertices
                '''
                sumAddVertices += len(line1)

                addedVerticesLine2 = []
                vListLine2 = []
                '''
                Connect foreground indices to neighbours in the next line
                '''
                for v1 in line1:
                    va = vList[line1.index(v1)]
                    diagonalLeft = diagonalRight = True
                    try:
                        if img[idx][v1 - 1] == True:
                            diagonalLeft = False
                            vb = vList[line1.index(v1 - 1)]
                            e = G.add_edge(va, vb)
                            eprop[e] = {
                                'coord1':
                                vprop[va]['coord'],
                                'coord2':
                                vprop[vb]['coord'],
                                'weight': ((vprop[va]['diameter'] +
                                            vprop[vb]['diameter']) / 2),
                                'RTP':
                                False
                            }
                            epropW[e] = 2. / (eprop[e]['weight']**2)
                    except:
                        print 'Boundary vertex at: ' + str(
                            [v1, idx - 1]) + ' image size: ' + str([w, h])
                        pass

                    try:
                        if img[idx][v1 + 1] == True:
                            diagonalRight = False
                            vb = vList[line1.index(v1 + 1)]
                            e = G.add_edge(va, vb)
                            eprop[e] = {
                                'coord1':
                                vprop[va]['coord'],
                                'coord2':
                                vprop[vb]['coord'],
                                'weight': ((vprop[va]['diameter'] +
                                            vprop[vb]['diameter']) / 2),
                                'RTP':
                                False
                            }
                            epropW[e] = 2. / (eprop[e]['weight']**2)
                    except:
                        print 'Boundary vertex at: ' + str(
                            [v1 + 1, idx]) + ' image size: ' + str([w, h])
                        pass  # just if we are out of bounds

                    try:
                        if img[idx + 1][v1] == True:
                            diagonalRight = False
                            diagonalLeft = False
                            vNew = G.add_vertex()
                            vprop[vNew] = {
                                'imgIdx': (v1, idx + 1),
                                'coord':
                                (float(v1) * xScale, float(idx + 1) * yScale),
                                'nrOfPaths':
                                0,
                                'diameter':
                                float(dia[idx + 1][v1]) * avgScale
                            }
                            vListLine2.append(vNew)
                            e = G.add_edge(vList[line1.index(v1)], vNew)
                            eprop[e] = {
                                'coord1':
                                vprop[va]['coord'],
                                'coord2':
                                vprop[vNew]['coord'],
                                'weight': ((vprop[va]['diameter'] +
                                            vprop[vNew]['diameter']) / 2),
                                'RTP':
                                False
                            }
                            epropW[e] = 1. / (eprop[e]['weight']**2)
                            if v1 not in addedVerticesLine2:
                                addedVerticesLine2.append(v1)
                    except:
                        print 'Boundary vertex at: ' + str(
                            [v1, idx + 1]) + ' image size: ' + str([w, h])
                        pass

                    try:
                        if diagonalRight == True and img[idx + 1][v1 +
                                                                  1] == True:
                            vNew = G.add_vertex()
                            vprop[vNew] = {
                                'imgIdx': (v1 + 1, idx + 1),
                                'coord': (float(v1 + 1) * xScale,
                                          float(idx + 1) * yScale),
                                'nrOfPaths':
                                0,
                                'diameter':
                                float(dia[idx + 1][v1 + 1]) * avgScale
                            }
                            vListLine2.append(vNew)
                            e = G.add_edge(vList[line1.index(v1)], vNew)
                            eprop[e] = {
                                'coord1':
                                vprop[va]['coord'],
                                'coord2':
                                vprop[vNew]['coord'],
                                'weight': ((vprop[va]['diameter'] +
                                            vprop[vNew]['diameter']) / 2),
                                'RTP':
                                False
                            }
                            epropW[e] = 1.41 / (eprop[e]['weight']**2)
                            if v1 + 1 not in addedVerticesLine2:
                                addedVerticesLine2.append(v1 + 1)
                    except:
                        print 'Boundary vertex at: ' + str(
                            [v1 + 1, idx + 1]) + ' image size: ' + str([w, h])
                        pass

                    try:
                        if diagonalLeft == True and img[idx + 1][v1 -
                                                                 1] == True:
                            vNew = G.add_vertex()
                            vprop[vNew] = {
                                'imgIdx': (v1 - 1, idx + 1),
                                'coord': (float(v1 - 1) * xScale,
                                          float(idx + 1) * yScale),
                                'nrOfPaths':
                                0,
                                'diameter':
                                float(dia[idx + 1][v1 - 1]) * avgScale
                            }
                            vListLine2.append(vNew)
                            e = G.add_edge(vList[line1.index(v1)], vNew)
                            eprop[e] = {
                                'coord1':
                                vprop[va]['coord'],
                                'coord2':
                                vprop[vNew]['coord'],
                                'weight': ((vprop[va]['diameter'] +
                                            vprop[vNew]['diameter']) / 2),
                                'RTP':
                                False
                            }
                            epropW[e] = 1.41 / (eprop[e]['weight']**2)
                            if v1 - 1 not in addedVerticesLine2:
                                addedVerticesLine2.append(v1 - 1)
                    except:
                        print 'Boundary vertex at: ' + str(
                            [v1 - 1, idx + 1]) + ' image size: ' + str([w, h])
                        pass
                    try:
                        if img[idx][v1 + 1] == False and img[idx][
                                v1 - 1] == False and img[idx + 1][
                                    v1] == False and diagonalLeft == False and diagonalRight == False:
                            print 'tip detected'
                            if img[idx - 1][v1 - 1] == False and img[idx - 1][
                                    v1 + 1] == False and img[idx -
                                                             1][v1] == False:
                                print 'floating pixel'
                    except:
                        pass

        print 'done!'
        G.edge_properties["ep"] = eprop
        G.edge_properties["w"] = epropW
        G.vertex_properties["vp"] = vprop
        print 'graph build in ' + str(time.time() - start)
        l = gt.label_largest_component(G)
        u = gt.GraphView(G, vfilt=l)
        print '# vertices'
        print(u.num_vertices())
        print(G.num_vertices())
        if u.num_vertices() != G.num_vertices():
            self.__fail = float((G.num_vertices() - u.num_vertices())) / float(
                G.num_vertices())
        return u, u.num_vertices()
Example #25
0
def budgeted_heuristic_querying(g: graph_tool.Graph, y, weights=None, budget=50, compute_hulls_between_queries=False,
                          hull_as_optimization=False, use_adjacency=False):
    '''

    :param g:
    :param paths: list of paths
    :param y: ground truth
    :param weight:
    :return:
    '''

    deg = g.degree_property_map("total").a
    #deg = deg*deg
    if use_adjacency:
        dist_map = graph_tool.topology.shortest_distance(g, weights=weights).get_2d_array(range(g.num_vertices())).T

        adjacency = dist_map.copy()
        adjacency[adjacency > 1] = 0
    else:
        # to prevent overflow etc.
        dist_map = graph_tool.topology.shortest_distance(g, weights=weights).get_2d_array(
            range(g.num_vertices())).T.astype(np.double)
        dist_map[dist_map > g.num_vertices()] = np.inf

    # hack to allow both endpoints as candidates:
    # new_spc = paths.copy()
    # for p in paths:
    #    new_spc.append(p[::-1])

    # paths = new_spc

    comps, hist = graph_tool.topology.label_components(g)
    n = g.num_vertices()
    classes = np.unique(y)
    known_labels = -np.ones(g.num_vertices()) * np.inf

    candidate_hulls = np.zeros(n, dtype=np.object)
    candidate_hull_sizes = np.zeros(n)
    known_classes = dict()
    classes_hulls = dict()
    for j in range(n):
        candidate_hulls[j] = dict()

    for c in classes:
        known_classes[c] = set()
        classes_hulls[c] = dict()
        classes_hulls[c] = np.zeros(n, np.bool)
        for j in range(n):
            one_hot = np.zeros(n, dtype=np.bool)
            one_hot[j] = True
            candidate_hulls[j][c] = one_hot  # singleton hull
    for z in range(budget):
        # compute most promising vertex
        for p in range(n):
            if known_labels[p] == -np.inf:
                candidate_hull_sizes[p] = helper_sum_sizes(candidate_hulls[p], classes_hulls)
            else:
                candidate_hull_sizes[p] = -1

        maximizers = np.where(candidate_hull_sizes == np.max(candidate_hull_sizes))[0]


        #overlap of classes
        classes_hulls_overlap = np.sum(np.array([key_index_array[1] for key_index_array in classes_hulls.items()]), axis=0)
        #classes_hulls_overlap[classes_hulls_overlap<=1] = 0
        maximizers = maximizers[np.where(classes_hulls_overlap[maximizers] == np.min(classes_hulls_overlap[maximizers]))[0]]

        #maximizers = maximizers[np.where(deg[maximizers] == np.max(deg[maximizers]))[0]]

        p_star = np.random.choice(maximizers)

        # query it
        known_labels[p_star] = y[p_star]

        # update data structures
        known_classes[known_labels[p_star]].add(p_star)
        classes_hulls[known_labels[p_star]] = candidate_hulls[p_star][known_labels[p_star]]

        for j in range(n):

            if known_labels[j] == -np.inf:# and not classes_hulls[c][j]:
                # if not candidate_hulls[j][c][candidate]:
                # if not classes_hulls[c][path[candidates[j]]]:
                # classes_hulls_c_set = set(np.where(classes_hulls[c])[0])
                # old_hull_with_new_candidate = list(classes_hulls_c_set)
                # old_hull_with_new_candidate.append(path[candidates[j]])
                c = known_labels[p_star]
                candidate_hulls[j][c] = compute_hull(g, list(known_classes[c].union([j])), weights,
                                                        dist_map, comps, hist,
                                                         hull_as_optimization)  # , classes_hulls_c_set)


                test = np.zeros(n, dtype=np.bool)

                for p1 in list(known_classes[c].union([j])):
                    for p2 in list(known_classes[c].union([j])):
                        test[dist_map[p1,:]+ dist_map[:,p2] == dist_map[p1,p2]] = True



        '''if compute_hulls_between_queries:
            for c in classes:
                known_labels[np.where(compute_hull(g, np.where(known_labels == c)[0], weights, dist_map, comps, hist))[0]] = c'''

        if compute_hulls_between_queries:
            known_labels_augmented = known_labels.copy()
            known_classes_hulls_temp = np.zeros((n, len(classes)), dtype=np.bool)
            for i, c in enumerate(classes):
                known_classes_hulls_temp[:, i] = compute_hull(g, np.where(known_labels_augmented == c)[0], weights,
                                                              dist_map, comps, hist, compute_closure=False)

            for i, c in enumerate(classes):
                only_c = known_classes_hulls_temp[:, i] & ~(
                    np.sum(known_classes_hulls_temp[:, np.arange(len(classes)) != i], axis=1).astype(bool))
                known_labels_augmented[only_c] = c

        else:
            known_labels_augmented = known_labels

        if use_adjacency:
            prediction = label_propagation(adjacency, known_labels_augmented, y, use_adjacency=use_adjacency)
        else:
            prediction = label_propagation(dist_map, known_labels_augmented, y, use_adjacency=use_adjacency)
        print("=====")
        print(z + 1, np.sum(known_labels > -np.inf))
        print(np.sum(np.array([i[1] for i in list(classes_hulls.items())]),axis=1))
        print("accuracy", np.sum(prediction == y) / y.size)
        #print(known_classes)

    return known_labels
Example #26
0
    def makeGraph(self, img, dia, xScale, yScale):
        print 'Building Graph Data Structure'
        start = time.time()
        G = Graph(directed=False)
        vprop = G.new_vertex_property('object')
        eprop = G.new_edge_property('object')
        epropW = G.new_edge_property("int32_t")
        avgScale = (xScale + yScale) / 2

        test = np.where(img == True)
        ss = np.shape(test)
        cccc = 0
        percentOld = 0.0
        print str(np.round(percentOld, 1)) + '%'
        for (i, j) in zip(test[1], test[0]):
            cccc += 1
            percent = (float(cccc) / float(ss[1])) * 100
            if percentOld + 10 < percent:
                print str(np.round(percent, 1)) + '%'
                percentOld = percent
            nodeNumber1 = (float(i) * yScale, float(j) * xScale)
            if gu.find_vertex(
                    G, vprop, {
                        'imgIdx': (j, i),
                        'coord': nodeNumber1,
                        'nrOfPaths': 0,
                        'diameter': float(dia[j][i]) * avgScale
                    }):
                v1 = gu.find_vertex(
                    G, vprop, {
                        'imgIdx': (j, i),
                        'coord': nodeNumber1,
                        'nrOfPaths': 0,
                        'diameter': float(dia[j][i]) * avgScale
                    })[0]
            else:
                v1 = G.add_vertex()
                vprop[G.vertex(v1)] = {
                    'imgIdx': (j, i),
                    'coord': nodeNumber1,
                    'nrOfPaths': 0,
                    'diameter': float(dia[j][i]) * avgScale
                }
            try:

                if img[j, i + 1] == True:
                    nodeNumber2 = (float(i + 1) * yScale, float(j) * xScale)
                    if gu.find_vertex(
                            G, vprop, {
                                'imgIdx': (j, i + 1),
                                'coord': nodeNumber2,
                                'nrOfPaths': 0,
                                'diameter': float(dia[j][i + 1]) * avgScale
                            }):
                        v2 = gu.find_vertex(
                            G, vprop, {
                                'imgIdx': (j, i + 1),
                                'coord': nodeNumber2,
                                'nrOfPaths': 0,
                                'diameter': float(dia[j][i + 1]) * avgScale
                            })[0]
                        if gu.find_edge(
                                G, eprop, {
                                    'coord1':
                                    vprop[v2]['coord'],
                                    'coord2':
                                    vprop[v1]['coord'],
                                    'weight': ((vprop[v1]['diameter'] +
                                                vprop[v2]['diameter']) / 2)**4,
                                    'RTP':
                                    False
                                }):
                            pass
                        else:
                            e = G.add_edge(v1, v2)
                            epropW[e] = (((vprop[v1]['diameter'] +
                                           vprop[v2]['diameter']) / 2) /
                                         avgScale)**4
                            eprop[e] = {
                                'coord1':
                                vprop[v1]['coord'],
                                'coord2':
                                vprop[v2]['coord'],
                                'weight': ((vprop[v1]['diameter'] +
                                            vprop[v2]['diameter']) / 2)**4,
                                'RTP':
                                False
                            }
                    else:
                        v2 = G.add_vertex()
                        vprop[G.vertex(v2)] = {
                            'imgIdx': (j, i + 1),
                            'coord': nodeNumber2,
                            'nrOfPaths': 0,
                            'diameter': float(dia[j][i + 1]) * avgScale
                        }
                        e = G.add_edge(v1, v2)
                        epropW[e] = (
                            ((vprop[v1]['diameter'] + vprop[v2]['diameter']) /
                             2) / avgScale)**4
                        eprop[e] = {
                            'coord1':
                            vprop[v1]['coord'],
                            'coord2':
                            vprop[v2]['coord'],
                            'weight':
                            ((vprop[v1]['diameter'] + vprop[v2]['diameter']) /
                             2)**4,
                            'RTP':
                            False
                        }
            except:
                pass
            try:
                if img[j, i - 1] == True:
                    nodeNumber2 = (float(i - 1) * yScale, float(j) * xScale)
                    if gu.find_vertex(
                            G, vprop, {
                                'imgIdx': (j, i - 1),
                                'coord': nodeNumber2,
                                'nrOfPaths': 0,
                                'diameter': float(dia[j][i - 1]) * avgScale
                            }):
                        v2 = gu.find_vertex(
                            G, vprop, {
                                'imgIdx': (j, i - 1),
                                'coord': nodeNumber2,
                                'nrOfPaths': 0,
                                'diameter': float(dia[j][i - 1]) * avgScale
                            })[0]
                        if gu.find_edge(
                                G, eprop, {
                                    'coord1':
                                    vprop[v2]['coord'],
                                    'coord2':
                                    vprop[v1]['coord'],
                                    'weight': ((vprop[v1]['diameter'] +
                                                vprop[v2]['diameter']) / 2)**4,
                                    'RTP':
                                    False
                                }):
                            pass
                        else:
                            e = G.add_edge(v1, v2)
                            epropW[e] = (((vprop[v1]['diameter'] +
                                           vprop[v2]['diameter']) / 2) /
                                         avgScale)**4
                            eprop[e] = {
                                'coord1':
                                vprop[v1]['coord'],
                                'coord2':
                                vprop[v2]['coord'],
                                'weight': ((vprop[v1]['diameter'] +
                                            vprop[v2]['diameter']) / 2)**4,
                                'RTP':
                                False
                            }
                    else:
                        v2 = G.add_vertex()
                        vprop[G.vertex(v2)] = {
                            'imgIdx': (j, i - 1),
                            'coord': nodeNumber2,
                            'nrOfPaths': 0,
                            'diameter': float(dia[j][i - 1]) * avgScale
                        }
                        e = G.add_edge(v1, v2)
                        epropW[e] = (
                            ((vprop[v1]['diameter'] + vprop[v2]['diameter']) /
                             2) / avgScale)**4
                        eprop[e] = {
                            'coord1':
                            vprop[v1]['coord'],
                            'coord2':
                            vprop[v2]['coord'],
                            'weight':
                            ((vprop[v1]['diameter'] + vprop[v2]['diameter']) /
                             2)**4,
                            'RTP':
                            False
                        }
            except:
                pass
            try:
                if img[j + 1, i] == True:
                    nodeNumber2 = (float(i) * yScale, float(j + 1) * xScale)
                    if gu.find_vertex(
                            G, vprop, {
                                'imgIdx': (j + 1, i),
                                'coord': nodeNumber2,
                                'nrOfPaths': 0,
                                'diameter': float(dia[j + 1][i]) * avgScale
                            }):
                        v2 = gu.find_vertex(
                            G, vprop, {
                                'imgIdx': (j + 1, i),
                                'coord': nodeNumber2,
                                'nrOfPaths': 0,
                                'diameter': float(dia[j + 1][i]) * avgScale
                            })[0]
                        if gu.find_edge(
                                G, eprop, {
                                    'coord1':
                                    vprop[v2]['coord'],
                                    'coord2':
                                    vprop[v1]['coord'],
                                    'weight': ((vprop[v1]['diameter'] +
                                                vprop[v2]['diameter']) / 2)**4,
                                    'RTP':
                                    False
                                }):
                            pass
                        else:
                            e = G.add_edge(v1, v2)
                            epropW[e] = (((vprop[v1]['diameter'] +
                                           vprop[v2]['diameter']) / 2) /
                                         avgScale)**4
                            eprop[e] = {
                                'coord1':
                                vprop[v1]['coord'],
                                'coord2':
                                vprop[v2]['coord'],
                                'weight': ((vprop[v1]['diameter'] +
                                            vprop[v2]['diameter']) / 2)**4,
                                'RTP':
                                False
                            }
                    else:
                        v2 = G.add_vertex()
                        vprop[G.vertex(v2)] = {
                            'imgIdx': (j + 1, i),
                            'coord': nodeNumber2,
                            'nrOfPaths': 0,
                            'diameter': float(dia[j + 1][i]) * avgScale
                        }
                        e = G.add_edge(v1, v2)
                        epropW[e] = (
                            ((vprop[v1]['diameter'] + vprop[v2]['diameter']) /
                             2) / avgScale)**4
                        eprop[e] = {
                            'coord1':
                            vprop[v1]['coord'],
                            'coord2':
                            vprop[v2]['coord'],
                            'weight':
                            ((vprop[v1]['diameter'] + vprop[v2]['diameter']) /
                             2)**4,
                            'RTP':
                            False
                        }
            except:
                pass
            try:
                if img[j - 1, i] == True:
                    nodeNumber2 = (float(i) * yScale, float(j - 1) * xScale)
                    if gu.find_vertex(
                            G, vprop, {
                                'imgIdx': (j - 1, i),
                                'coord': nodeNumber2,
                                'nrOfPaths': 0,
                                'diameter': float(dia[j - 1][i]) * avgScale
                            }):
                        v2 = gu.find_vertex(
                            G, vprop, {
                                'imgIdx': (j - 1, i),
                                'coord': nodeNumber2,
                                'nrOfPaths': 0,
                                'diameter': float(dia[j - 1][i]) * avgScale
                            })[0]
                        if gu.find_edge(
                                G, eprop, {
                                    'coord1':
                                    vprop[v2]['coord'],
                                    'coord2':
                                    vprop[v1]['coord'],
                                    'weight': ((vprop[v1]['diameter'] +
                                                vprop[v2]['diameter']) / 2)**4,
                                    'RTP':
                                    False
                                }):
                            pass
                        else:
                            e = G.add_edge(v1, v2)
                            epropW[e] = (((vprop[v1]['diameter'] +
                                           vprop[v2]['diameter']) / 2) /
                                         avgScale)**4
                            eprop[e] = {
                                'coord1':
                                vprop[v1]['coord'],
                                'coord2':
                                vprop[v2]['coord'],
                                'weight': ((vprop[v1]['diameter'] +
                                            vprop[v2]['diameter']) / 2)**4,
                                'RTP':
                                False
                            }
                    else:
                        v2 = G.add_vertex()
                        vprop[G.vertex(v2)] = {
                            'imgIdx': (j - 1, i),
                            'coord': nodeNumber2,
                            'nrOfPaths': 0,
                            'diameter': float(dia[j - 1][i]) * avgScale
                        }
                        e = G.add_edge(v1, v2)
                        epropW[e] = (
                            ((vprop[v1]['diameter'] + vprop[v2]['diameter']) /
                             2) / avgScale)**4
                        eprop[e] = {
                            'coord1':
                            vprop[v1]['coord'],
                            'coord2':
                            vprop[v2]['coord'],
                            'weight':
                            ((vprop[v1]['diameter'] + vprop[v2]['diameter']) /
                             2)**4,
                            'RTP':
                            False
                        }
            except:
                pass
#
        print '100.0%'
        print 'selecting largest connected component'
        G.edge_properties["ep"] = eprop
        G.edge_properties["w"] = epropW
        G.vertex_properties["vp"] = vprop
        l = gt.label_largest_component(G)
        print(l.a)
        u = gt.GraphView(G, vfilt=l)
        print '# vertices'
        print(u.num_vertices())
        print(G.num_vertices())
        print '# edges'
        print(u.num_edges())
        print 'building graph finished in: ' + str(time.time() - start) + 's'
        return u
Example #27
0
class GraphClass:

	#------------#
	# Initialize #
	#------------#

	def __init__ (self, dicProp={"Name": "Graph", "Type": "None", "Weighted": False}, graph=None):
		''' init from properties '''
		self.dicProperties = deepcopy(dicProp)
		self.dicGetProp = { "Reciprocity": get_reciprocity, "Clustering": get_clustering, "Assortativity": get_assortativity,
							"Diameter": get_diameter, "SCC": get_num_scc, #"Spectral radius": get_spectral_radius, 
							"WCC": get_num_wcc, "InhibFrac": get_inhib_frac }
		self.dicGenGraph = { "Erdos-Renyi": gen_er, "Free-scale": gen_fs, "EDR": gen_edr }
		# create a graph
		if graph != None:
			# use the one furnished
			self.__graph = graph
			self.update_prop()
			self.bPropToDate = True
		elif dicProp["Type"] == "None":
			# create an empty graph
			self.__graph = Graph()
			self.bPropToDate = False
		else:
			# generate a graph of the requested type
			self.__graph = self.dicGenGraph[dicProp["Type"]](self.dicProperties)
			self.update_prop()
			self.set_name()
			self.bPropToDate = True

	@classmethod
	def from_graph_class(cls, graphToCopy):
		''' create new GraphClass instance as a deepcopy of another '''
		dicProperties = deepcopy(graphToCopy.get_dict_properties())
		gtGraph = graphToCopy.get_graph().copy()
		# create
		graphClass = cls(dicProperties, gtGraph)
		# set state of properties
		bPropToDate = deepcopy(graphToCopy.bPropToDate)
		bBetwToDate = deepcopy(graphToCopy.bBetwToDate)
		graphClass.bPropToDate = bPropToDate
		graphClass.bBetwToDate = bBetwToDate
		return graphClass

	def copy(self):
		''' returns a deepcopy of the graphClass instance '''
		graphCopy = GraphClass()
		graphCopy.set_graph(self.__graph.copy())
		graphCopy.update_prop()
		graphCopy.set_name(self.dicProperties["Name"]+'_copy')
		return graphCopy

	#---------------------------#
	# Manipulating the gt graph #
	#---------------------------#

	def set_graph(self, gtGraph):
		''' acquire a graph_tool graph as its own '''
		if gtGraph.__class__ == Graph:
			self.__graph = gtGraph
		else:
			raise TypeError("The object passed to 'copy_gt_graph' is not a < class 'graph_tool.Graph' > but a {}".format(gtGraph.__class__))

	def inhibitory_subgraph(self):
		''' create a GraphClass instance which graph contains only
		the inhibitory connections of the current instance's graph '''
		graph = self.graph.copy()
		epropType = graph.new_edge_property("bool",-graph.edge_properties["type"].a+1)
		graph.set_edge_filter(epropType)
		inhibGraph = GraphClass()
		inhibGraph.set_graph(Graph(graph,prune=True))
		inhibGraph.set_prop("Weighted", True)
		return inhibGraph

	def excitatory_subgraph(self):
		''' create a GraphClass instance which graph contains only
		the excitatory connections of the current instance's graph '''
		graph = self.graph.copy()
		epropType = graph.new_edge_property("bool",graph.edge_properties["type"].a+1)
		graph.set_edge_filter(epropType)
		excGraph = GraphClass()
		excGraph.set_graph(Graph(graph,prune=True))
		excGraph.set_prop("Weighted", True)
		return excGraph

	#-------------------------#
	# Set or update functions #
	#-------------------------#
		
	def set_name(self,name=""):
		''' set graph name '''
		if name != "":
			self.dicProperties["Name"] = name
		else:
			strName = self.dicProperties["Type"]
			tplUse = ("Nodes", "Edges", "Distribution")
			for key,value in self.dicProperties.items():
				if key in tplUse and (value.__class__ != dict):
					strName += '_' + key[0] + str(value)
				if key == "Clustering":
					strName += '_' + key[0] + str(around(value,4))
			self.dicProperties["Name"] = strName
		print(self.dicProperties["Name"])

	def update_prop(self, lstProp=[]):
		''' update part or all of the graph properties '''
		if lstProp:
			for strPropName in lstProp:
				if strPropName in self.dicGetProp.keys():
					self.dicProperties[strPropName] = self.dicGetProp[strPropName](self.__graph)
				else:
					print("Ignoring unknown property '{}'".format(strPropName))
		else:
			self.dicProperties.update({ strPropName: self.dicGetProp[strPropName](self.__graph) for strPropName in self.dicGetProp.keys() })
			self.bPropToDate = True

	#---------------#
	# Get functions #
	#---------------#

	## basic properties

	def get_name(self):
		return self.dicProperties["Name"]
	
	def num_vertices(self):
		return self.__graph.num_vertices()

	def num_edges(self):
		return self.__graph.num_edges()

	def get_density(self):
		return self.__graph.num_edges()/float(self.__graph.num_vertices()**2)

	def is_weighted(self):
		return self.dicProperties["Weighted"]

	## graph and adjacency matrix
	
	def get_graph(self):
		self.bPropToDate = False
		self.bBetwToDate = False
		self.wBetweeness = False
		return self.__graph

	def get_mat_adjacency(self):
		return adjacency(self.__graph, self.get_weights())

	## complex properties
	
	def get_prop(self, strPropName):
		if strPropName in self.dicProperties.keys():
			if not self.bPropToDate:
				self.dicProperties[strPropName] = self.dicGetProp[strPropName](self.__graph)
			return self.dicProperties[strPropName]
		else:
			print("Ignoring request for unknown property '{}'".format(strPropName))

	def get_dict_properties(self):
		return self.dicProperties

	def get_degrees(self, strType="total", bWeights=True):
		lstValidTypes = ["in", "out", "total"]
		if strType in lstValidTypes:
			return degree_list(self.__graph, strType, bWeights)
		else:
			print("Ignoring invalid degree type '{}'".format(strType))
			return None

	def get_betweenness(self, bWeights=True):
		if bWeights:
			if not self.bWBetwToDate:
				self.wBetweeness = betweenness_list(self.__graph, bWeights)
				self.wBetweeness = True
			return self.wBetweeness
		if not self.bBetwToDate and not bWeights:
			self.betweenness = betweenness_list(self.__graph, bWeights)
			self.bBetwToDate = True
			return self.betweenness

	def get_types(self):
		if "type" in self.graph.edge_properties.keys():
			return self.__graph.edge_properties["type"].a
		else:
			return repeat(1, self.__graph.num_edges())
	
	def get_weights(self):
		if self.dicProperties["Weighted"]:
			epropW = self.__graph.edge_properties["weight"].copy()
			epropW.a = multiply(epropW.a, self.__graph.edge_properties["type"].a)
			return epropW
		else:
			return self.__graph.edge_properties["type"].copy()
Example #28
0
class Interactome:
    r'''
    Attributes:
        interactome_path (str):
            the path to the tsv file containing the interactome per se
        namecode (str):
            the name used to recover the (sub)interactome later
        G (:class:`graph_tool.Graph`):
            the internal representation of the interactome as a graph
        genes2vertices (dict):
            mapping Entrez gene :math:`\rightarrow` set of vertices in ``self.G``
        genes (set):
            set of Entrez names of genes present in ``self.G``
        lcc_cache (dict):
            mapping a number of genes to the LCC size of the uniformly sampled subgraphs of this size
        density_cache (dict):
            mapping a number of genes to the density of the uniformly sampled subgraphs of this size
        clustering_cache (dict):
            mapping a number of genes to the clustering coefficient of the uniformly sampled subgraphs of this size
        distances (2D :class:`np.ndarray`):
            matrix of shortest paths from gene :math:`i` to gene :math:`j`
    '''
    def __init__(self, path, namecode=None):
        self.interactome_path = path
        self.namecode = namecode
        self.distances = None
        log('Loading interactome')
        if path is not None:
            self.load_network(path)
        log('interactome loaded')
        self.lcc_cache = self.density_cache = self.clustering_cache = None

    def get_gene_degree(self, gene):
        '''
        Get the degree of a given gene within the interactome.

        Args:
            gene (int): Entrez ID of the gene

        Return:
            int:
                `None` if the gene is not in :math:`\mathscr I` else the number of associated genes within the interactome
        '''
        if gene not in self.genes:
            return None
        vert_id = self.vert_id(gene)
        return self.G.vertex(vert_id).out_degree()

    def set_namecode(self, namecode):
        assert isinstance(namecode, str)
        self.namecode = namecode

    def get_lcc_cache(self):
        '''
        Return the cache of LCC sizes. WARNING: no copy is made.
        Modifying the returned cache can result in undefined behaviour.
        '''
        self.load_lcc_cache()
        return self.lcc_cache

    def load_lcc_cache(self):
        '''Load the cache of LCC sizes simulations if exists, else creates an empty one.'''
        if self.lcc_cache is None:
            self.lcc_cache = IO.load_lcc_cache(self)

    def get_density_cache(self):
        '''
        Return the cache of density. WARNING: no copy is made.
        Modifying the returned cache can result in undefined behaviour.
        '''
        self.load_density_cache()
        return self.density_cache

    def load_density_cache(self):
        '''Load the cache of density simulations if exists, else creates an empty one.'''
        if self.density_cache is None:
            self.density_cache = IO.load_density_cache(self)

    def get_clustering_cache(self):
        '''
        Return the cache of clustering coefficients. WARNING: no copy is made.
        Modifying the returned cache can result in undefined behaviour.
        '''
        self.load_clustering_cache()
        return self.clustering_cache

    def load_clustering_cache(self):
        '''Load the cache of clustering coefficient simulations if exists, else creates an empty one.'''
        if self.clustering_cache is None:
            self.clustering_cache = IO.load_clustering_cache(self)

    def load_network(self, path):
        '''
        Load the interactome stored in a tsv file

        Args:
            path: the path of the interactome file
        '''
        self.G = Graph(directed=False)
        self.genes2vertices = dict()
        with open(path) as f:
            reader = csv.reader(f, delimiter='\t')
            for genes in reader:
                gene1, gene2 = map(int, genes)
                self.add_vertex(gene1)
                self.add_vertex(gene2)
                self.G.add_edge(self.vert_id(gene1), self.vert_id(gene2))
        self.genes = set(self.genes2vertices.keys())
        self.vertices2genes = {v: g for g, v in self.genes2vertices.items()}
        self.compute_spls()

    def add_vertex(self, gene):
        '''
        Create new vertex for `gene` in the graph if not yet present

        Args:
            gene: the name of the gene to ad in the interactome
        '''
        if gene not in self.genes2vertices:
            self.genes2vertices[gene] = len(self.genes2vertices)
            self.G.add_vertex()

    def vert_id(self, gene):
        '''
        Return the id of the desired gene

        Args:
            gene: the gene to retrieve

        Returns:
            the id of the desired gene

        Raises:
            KeyError: if no such gene is in the interactome
        '''
        return self.genes2vertices[gene]

    def verts_id(self, genes, gene_to_ignore=None):
        '''
        Return a list of Vertex instances of the desired genes

        Args:
            genes: an iterable of desired genes
            gene_to_ignore: gene in `genes` that is not desired

        Returns:
            a list of Vertex instances of the desired genes

        Raises:
            KeyError: if any of the genes is not in the interactome
        '''
        return np.array(
            [self.vert_id(gene) for gene in genes if gene != gene_to_ignore])

    def compute_spls(self):
        '''Compute the shortest path between each pair of genes.'''
        if self.distances is not None:
            return
        dists = shortest_distance(self.G)
        self.distances = np.empty(
            (self.G.num_vertices(), self.G.num_vertices()), dtype=np.int)
        for idx, array in enumerate(dists):
            self.distances[idx, :] = array.a[:]

    def get_all_dists(self, A, B):
        '''
        Get a list containing all the distances from a gene in A to the gene set B

        Args:
            A: a source gene set
            B: a destination gene set

        Returns:
            a list of distances [d(a, B) s.t. a in A]
        '''
        insert_self = A is B
        all_dists = list()
        for gene1 in A:
            if insert_self:
                for idx, el in enumerate(B):
                    if el == gene1:
                        indices = np.delete(B, idx)
                        break
            else:
                indices = B
            if not indices.any():
                continue
            indices = np.asarray(indices)
            self.compute_spls()
            dists = self.distances[gene1, indices]
            min_dist = np.min(dists)
            if min_dist > self.G.num_vertices():  # if gene is isolated
                continue  # go to next gene
            all_dists.append(min_dist)
        return all_dists

    def get_d_A(self, A):
        '''
        Return the inner distance of the disease module A as defined in [1].

        Args:
            A: a gene set

        Returns:
            :math:`d_A`

        References
        ----------

        [1] J. Menche et al., Science 347 , 1257601 (2015). DOI: 10.1126/science.1257601 http://science.sciencemag.org/content/347/6224/1257601
        '''
        return np.mean(self.get_all_dists(A, A))

    def get_d_AB(self, A, B):
        '''
        Return the graph-based distance between A and B as defined in [1].

        Args:
            A: a gene set
            B: a gene set

        Returns:
            :math:`d_{AB}`

        References
        ----------

        [1] J. Menche et al., Science 347 , 1257601 (2015). DOI: 10.1126/science.1257601 http://science.sciencemag.org/content/347/6224/1257601
        '''
        values = self.get_all_dists(A, B)
        values.extend(self.get_all_dists(B, A))
        return np.mean(values, dtype=np.float32)

    def get_random_subgraph(self, size):
        '''
        Uniformly sample a subgraph of given size.

        Args:
            size: number of genes to sample

        Returns:
            A subgraph of self of given size
        '''
        seeds = np.random.choice(len(self.genes), size=size, replace=False)
        return self.get_subgraph(seeds)

    def get_subgraph(self, vertices, genes=False):
        r'''
        Return the subgraph of self induced by the given vertices.

        Args:
            vertices: a set of vertex IDs (or a set of genes)
            genes: a boolean with value `True` if `vertices` is a set of genes
                and `False` if it is a set of vertex IDs.

        Returns:
            :math:`\Delta_{\text{vertices}}(G)`
        '''
        if genes:
            vertices = self.verts_id(vertices)
        filt = self.G.new_vertex_property('bool')
        filt.a[vertices] = True
        return GraphView(self.G, vfilt=filt)

    def get_genes_lcc_size(self, genes):
        r'''
        Return the LCC size of the graph induced by given genes.

        Args:
            genes: an iterable containing genes

        Returns:
            :math:`|LCC(\Delta_{\text{genes}}(G))|`
        '''
        return _get_lcc_size(self.get_subgraph(np.asarray(genes)))

    def get_random_genes_lcc(self, size):
        r'''
        Return the LCC size of a random subgraph of given size.

        Args:
            size (in): number of genes to sample

        Returns:
            :math:`|LCC(\mathcal G(\text{size}, G))|`
        '''
        return _get_lcc_size(self.get_random_subgraph(size))

    def get_random_genes_density(self, size):
        r'''
        Return the density of a random subgraph of given size.

        Args:
            size (int): number of genes to sample

        Returns:
            :math:`d(\mathcal G(\text{size}, G))`
        '''
        return _get_density(self.get_random_subgraph(size))

    def get_genes_density(self, genes):
        r'''
        Return the density of the subgraph induced by given genes.

        Args:
            genes: an iterable of genes

        Returns:
            :math:`d(\Delta_{\text{genes}}(G))`
        '''
        return _get_density(self.get_subgraph(np.asarray(genes)))

    def get_random_genes_clustering(self, size):
        r'''
        Return the clustering coefficient of a random subgraph of given size.

        Args:
            size (int): number of genes to sample

        Returns:
            :math:`C(\mathcal G(\text{size}, G))`
        '''
        G = self.get_random_subgraph(size)
        ret = _get_clustering_coefficient(G)
        return ret

    def get_genes_clustering(self, genes, entrez=False):
        r'''
        Return the clustering coefficient of the subgraph induced by given genes.

        Args:
            genes: an iterable of genes

        Returns:
            :math:`C(\Delta_{\text{genes}}(G))`
        '''
        if entrez:
            genes = self.verts_id(genes)
        return _get_clustering_coefficient(self.get_subgraph(
            np.asarray(genes)))

    def get_lcc_score(self,
                      genes,
                      nb_sims,
                      shapiro=False,
                      shapiro_threshold=.05):
        r'''
        Get the z-score and the empirical p-value of the LCC size of given genes.

        Args:
            genes (set): gene set
            nb_sims (int): minimum number of simulations for probability distribution estimation
            shapiro (bool): True if normality test is needed, False otherwise (default False)
            shapiro_threshold (float): statistical threshold for normality test

        Returns:
            tuple:
                :math:`(z, p_e, N)` if shapiro is True and :math:`(z, p_e)` otherwise;
                where z is the z-score of the LCC size, :math:`p_e` is the associated
                empirical p-value and N is True if Shapiro-Wilk normality test
                p-value >= shapiro_threshold and False otherwise

        Raises:
            ValueError: if not enough simulations have been performed
        '''
        genes = genes & self.genes
        genes = self.verts_id(genes)
        nb_seeds = len(genes)
        if nb_seeds == 0:
            print('\n\t[Warning: get_lcc_score found no matching gene]')
            return None
        genes_lcc = self.get_genes_lcc_size(genes)
        try:
            lccs = self.get_lcc_cache()[nb_seeds]
            assert len(lccs) >= nb_sims
        except AssertionError:
            raise ValueError(('Only {} simulations found. Expected >= {}. ' + \
                              'fill_lcc_cache has not been called properly') \
                             .format(len(lccs), nb_sims))
        std = lccs.std()
        mean = lccs.mean()
        z = None if std == 0 else float((genes_lcc - mean) / std)
        empirical_p = (lccs >= genes_lcc).sum() / len(lccs)
        if shapiro:
            is_normal = stats.shapiro(np.random.choice(
                lccs, size=5000))[1] >= shapiro_threshold
            return z, empirical_p, is_normal
        return z, empirical_p

    def where_density_cache_nb_sims_lower_than(self, sizes, nb_sims):
        r'''
        Get the sizes whose density hasn't been simulated enough.

        Args:
            sizes (iterable): iterable of int values corresponding to sizes to test
            nb_sims (int): minimal number of simulations

        Returns:
            set:
                set of int values corresponding to sizes that haven't been simulated enough:

                .. math::
                    \{s \in \text{sizes} : |\text{density_cache}[s]| < \text{nb_sims}\}
        '''
        self.load_density_cache()
        return {size for size in sizes \
                     if size not in self.density_cache.keys() \
                     or len(self.density_cache[size]) < nb_sims}

    def where_lcc_cache_nb_sims_lower_than(self, sizes, nb_sims):
        r'''
        Get the sizes whose LCC hasn't been simulated enough.

        Args:
            sizes (iterable): iterable of int values corresponding to sizes to test
            nb_sims (int): minimal number of simulations

        Returns:
            set:
                set of int values corresponding to sizes that haven't been simulated enough:

                .. math::
                    \{s \in \text{sizes} : |\text{lcc_cache}[s]| < \text{nb_sims}\}
        '''
        self.load_lcc_cache()
        return {size for size in sizes \
                     if size not in self.lcc_cache.keys() \
                     or len(self.lcc_cache[size]) < nb_sims}

    def where_clustering_cache_nb_sims_lower_than(self, sizes, nb_sims):
        r'''
        Get the sizes whose clustering coefficient hasn't been simulated enough.

        Args:
            sizes (iterable): iterable of int values corresponding to sizes to test
            nb_sims (int): minimal number of simulations

        Returns:
            set:
                set of int values corresponding to sizes that haven't been simulated enough:

                .. math::
                    \{s \in \text{sizes} : |\text{clustering_cache}[s]| < \text{nb_sims}\}
        '''
        self.load_clustering_cache()
        return {size for size in sizes \
                     if size not in self.clustering_cache.keys() \
                     or len(self.clustering_cache[size]) < nb_sims}

    def fill_lcc_cache(self, nb_sims, sizes):
        r'''
        Fill the lcc_cache such that:

        .. math::
            \forall s \in \text{sizes} : |\text{lcc_cache[n]}| >= \text{nb_sims}

        Args:
            nb_sims (int): minimal number of simulations to be performed
            sizes (set): set of number of genes for which LCC size shall be tested
        '''
        self.load_lcc_cache()
        a = time()
        for idx, size in enumerate(sizes):
            self._compute_lcc_distribution(nb_sims, size)
            prop = (idx + 1) / len(sizes)
            log('{} out of {}  ({:3.2f}%)    eta: {}' \
                .format(idx+1, len(sizes), 100*prop,
                        sec2date((time()-a)/prop*(1-prop))),
                end='\r')
        print('')
        self._write_lcc_cache()

    def fill_density_cache(self, nb_sims, sizes):
        r'''
        Fill the density cache such that:

        .. math::
            \forall s \in \text{sizes} : |\text{density_cache[n]}| \geq \text{nb_sims}

        Args:
            nb_sims (int): minimal number of simulations to be performed
            sizes (set): set of number of genes for which density shall be tested
        '''
        self.load_density_cache()
        a = time()
        for idx, size in enumerate(sizes):
            self._compute_disease_module_density(nb_sims, size)
            prop = (idx + 1) / len(sizes)
            log('{} out of {}  ({:3.2f}%)    eta: {}' \
                .format(idx+1, len(sizes), 100*prop,
                        sec2date((time()-a)/prop*(1-prop))),
                end='\r')
        print('')
        self._write_density_cache()

    def fill_clustering_cache(self, nb_sims, sizes):
        r'''
        Fill the clustering cache such that:

        .. math::
            \forall s \in \text{ßizes} : |\text{clustering_cache[n]}| \geq \text{nb_sims}

        Args:
            nb_sims (int): minimal nuber of simulations to be performed
            sizes (set): set of number of genes for which clustering coefficient shall be tested
        '''
        self.load_clustering_cache()
        a = time()
        for idx, size in enumerate(sizes):
            self._compute_disease_modules_clustering(nb_sims, size)
            prop = (idx + 1) / len(sizes)
            log('{} out of {}  ({:3.2f}%)    eta: {}' \
                .format(idx+1, len(sizes), 100*prop,
                        sec2date((time()-a)/prop*(1-prop))),
                end='\r')
        print('')
        self._write_clustering_cache()

    def get_subinteractome(self,
                           genes,
                           neighbourhood='none',
                           namecode=None,
                           neighb_count=1):
        r'''
        Extract a subinteractome and return it as an :class:`Interactome`
        object which is then usable for analyses.

        For :math:`H` a subgraph of :math:`G`, the first neighbourhood of :math:`H`
        within :math:`G` is defined by the graph:

        .. math::
            \mathcal N_G(H) = \Delta_{\mathcal N_G(V(H))}(G),

        where for every :math:`W \subset V(G)`:

        .. math::
            \mathcal N_G(W) = W \cup \left\{v \in V(G) : \exists w \in V(H) \text{ s.t. } \{v, w\} \in E(G)\right\} \subset V(G).

        Args:
            genes (set): the gene set inducing the subinteractome
            neighbourhood (str):
                one of the following: `'none'`, `'first'`, `'first-joined'` where:

                * `'none'` for no neighbouring gene
                * `'first'` for the first neighbourhood :math:`\mathcal N_G(H)` with :math:`G` being `self` and :math:`H` being `genes`
                * `'first-joined'` for the first neighbourhood with restriction that every neighbourhood gene must be associated to at least `neighb_count` genes.
            namecode (str): the namecode to be given to the subinteractome
            neighb_count (int): (only if `neighbourhood == 'first-joined'`) determines the minimum number of adjacent genes to be extracted:

                .. math::
                    \mathcal N_G^{(k)}(H) := \Delta_{\mathcal N_G^{(k)}}(H),

                with:

                .. math::
                    \mathcal N_G^{(k)}(W) := W \cup \left\{v \in V(G) : \exists \{v_1, \ldots, v_k\} \in \binom {V(H)}k \text{ s.t. } \{v, v_i\} \in E(G)
                        \quad (i=1, \ldots, k)\right\} \subset V(G).

        Return:
            :class:`Interactome`:
                the subinteractome
        '''
        #TODO: implement neighbourhood extraction
        genes &= self.genes
        genes_hash = md5(''.join(sorted(map(
            str, genes))).encode('utf-8')).hexdigest()
        path = self.interactome_path + genes_hash
        ret = IO.load_interactome(path, False, namecode)
        if ret is not None:
            return ret
        ret = deepcopy(self)
        ret.namecode = namecode
        ret.interactome_path = path

        ret.genes, ret.G = self._get_subinteractome_graph(
            genes, neighbourhood, neighb_count)
        print('So {} vertices, {} edges (density == {})' \
              .format(
                ret.G.num_vertices(),
                ret.G.num_edges(),
                2*ret.G.num_edges()/(ret.G.num_vertices()*(ret.G.num_vertices() - 1))
              )
        )
        genes_l = np.array(list(ret.genes))
        # Compute the mappings gene -> idx
        vp = ret.G.vp['genes']
        ret.genes2vertices = {
            vp[vertex]: int(vertex)
            for vertex in ret.G.vertices()
        }
        print('...  {}'.format(len(ret.genes2vertices)))
        del ret.G.vertex_properties['genes']
        del self.G.vertex_properties['genes']
        ret.genes = set(ret.genes2vertices.keys())
        ret.lcc_cache = ret.density_cache = None
        ret.distances = None
        ret.compute_spls()
        IO.save_interactome(ret)
        return ret

    def _get_subinteractome_graph(self, genes, neighbourhood, neighb_count):
        print('Initially: {} genes'.format(len(genes)))
        if neighbourhood is not None and neighbourhood != 'none':
            genes = self._get_genes_neighbourhood(genes, neighbourhood,
                                                  neighb_count)
        vp = self.G.new_vp('int')
        for gene, vertex in self.genes2vertices.items():
            vp[self.G.vertex(vertex)] = gene
        self.G.vertex_properties['genes'] = vp
        genes_l = np.array(list(genes))
        # Extract subgraph with ``genes``
        G = self.get_subgraph(genes, True)
        # Ignore genes of degree 0
        genes_idx = np.where(
            G.get_out_degrees(np.arange(G.num_vertices())) > 0)[0]
        genes = {self.vertices2genes[idx] for idx in genes_idx}
        print('After removing isolated vertices: {} genes'.format(len(genes)))
        return genes, Graph(self.get_subgraph(genes, True), prune=True)

    def _get_genes_neighbourhood(self, genes, neighbourhood, neighb_count):
        raise NotImplementedError()
        # First neighbourhood
        vert2genes = dict()
        for k, v in self.genes2vertices.items():
            vert2genes[v] = k
        closure_genes = set()
        for gene in genes:
            gene_idx = self.genes2vertices[gene]
            for neighbour in self.G.get_out_neighbours(gene_idx):
                closure_genes.add(vert2genes[neighbour])
        return closure_genes | genes

    def copy(self):
        '''
        Return a copy of the interactome
        '''
        ret = deepcopy(self)
        ret.G = self.G.copy()  # watch out: deepcopy(self.G) returns None...
        return ret

    ##### Private methods

    def _compute_lcc_distribution(self, nb_sims, size):
        N = nb_sims
        if size in self.lcc_cache:
            nb_sims -= len(self.lcc_cache[size])
        if nb_sims < 0:
            print('[Warning]: {} sims required but {} already performed' \
                  .format(N, len(self.lcc_cache[size])))
            return
        lccs = np.empty(nb_sims, dtype=np.float)
        for i in range(nb_sims):
            lccs[i] = self.get_random_genes_lcc(size)
        if size in self.lcc_cache:
            self.lcc_cache[size] = np.concatenate((self.lcc_cache[size], lccs))
        else:
            self.lcc_cache[size] = lccs

    def _compute_disease_module_density(self, nb_sims, size):
        N = nb_sims
        if size in self.density_cache:
            nb_sims -= len(self.density_cache[size])
        if size <= 0 or nb_sims <= 0:
            return
        densities = np.empty(nb_sims, dtype=np.float)
        for i in range(nb_sims):
            densities[i] = self.get_random_genes_density(size)
        try:
            densities = np.concatenate((self.density_cache[size], densities))
        except (KeyError, ValueError):
            pass
        self.density_cache[size] = densities

    def _compute_disease_modules_clustering(self, nb_sims, size):
        N = nb_sims
        if size in self.clustering_cache:
            nb_sims -= len(self.clustering_cache[size])
        if size < 3 or nb_sims <= 0:
            return
        clustering_coeffs = np.empty(nb_sims, dtype=np.float)
        for i in range(nb_sims):
            clustering_coeffs[i] = self.get_random_genes_clustering(size)
        try:
            clustering_coeffs = np.concatenate(
                (self.clustering_cache[size], clustering_coeffs))
        except (KeyError, ValueError):
            pass
        self.clustering_cache[size] = clustering_coeffs

    def _write_lcc_cache(self):
        IO.save_lcc_cache(self, self.lcc_cache)

    def _write_density_cache(self):
        IO.save_density_cache(self, self.density_cache)

    def _write_clustering_cache(self):
        IO.save_clustering_cache(self, self.clustering_cache)

    def save(self):
        IO.save_interactome(self)