Ejemplo n.º 1
0
def compute_stretch(k, dst, edges, lcc_nodes):
    """Compute the stretch of all edges of `k` but those in the graph spanned
    by `edges`"""
    test_graph = {}
    slcc = set(lcc_nodes)
    k.set_vertex_filter(None)
    k.set_edge_filter(None)
    n = k.num_vertices()
    bfsmap = k.new_edge_property('boolean')
    for e in k.edges():
        u, v = int(e.source()), int(e.target())
        if (u, v) in edges:
            bfsmap[e] = True
        else:
            bfsmap[e] = False
            if u in slcc:
                pot.add_edge_to_tree(test_graph, u, v)
    k.set_edge_filter(bfsmap)

    tree_dst = shortest_distance(k, dense=False)
    tree_mat = np.zeros((n, n), dtype=np.uint8)
    for v in k.vertices():
        tree_mat[int(v), :] = tree_dst[v].a.astype(np.uint8)

    edge_paths, paths = [], []
    for i, v in enumerate(lcc_nodes):
        graph_distance = dst[v, lcc_nodes[i+1:]]
        tree_distance = tree_mat[v, lcc_nodes[i+1:]]
        if v in test_graph:
            edge_paths.extend(tree_mat[v, sorted(test_graph[v])].ravel())
        ratio = (tree_distance/graph_distance)
        paths.extend(ratio.ravel())
    prct = list(np.percentile(edge_paths, [25, 50, 75]))
    return prct + [np.mean(edge_paths)]
Ejemplo n.º 2
0
def compute_stretch(gt_graph, dst_mat, spanner_edges):
    n = gt_graph.num_vertices()
    train_edges = {(u, v) for u, v in spanner_edges}
    test_graph = {}
    for e in redensify.EDGES_SIGN.keys():
        if e not in train_edges:
            pot.add_edge_to_tree(test_graph, e[0], e[1])
    spannermap = gt_graph.new_edge_property('boolean')
    num_edges = 0
    for e in gt_graph.edges():
        u, v = int(e.source()), int(e.target())
        spannermap[e] = (u, v) in spanner_edges
        num_edges += 1
    print('{}, {:.1f}\% &'.format(len(spanner_edges),
                                  100*len(spanner_edges) / num_edges))
    gt_graph.set_edge_filter(spannermap)
    spanner_dst = shortest_distance(gt_graph, dense=False)
    gt_graph.set_edge_filter(None)
    spanner_mat = np.zeros((n, n), dtype=np.uint8)
    for v in gt_graph.vertices():
        spanner_mat[int(v), :] = spanner_dst[v].a.astype(np.uint8)
    tsum, tsize, esum, esize = 0, 0, 0, 0
    for v in range(n):
        graph_distance = dst_mat[v, v+1:]
        tree_distance = spanner_mat[v, v+1:]
        if v in test_graph:
            esum += spanner_mat[v, sorted(test_graph[v])].sum()
            esize += len(test_graph[v])
        ratio = (tree_distance/graph_distance)
        tsum += ratio.sum()
        tsize += ratio.shape[0]
    path_stretch = tsum/tsize
    edge_stretch = esum/esize
    return path_stretch, edge_stretch
Ejemplo n.º 3
0
def diam_aspl(g):
  n = g.num_vertices()
  sd = topology.shortest_distance(g)
  
  sda = np.array([sd[v] for v in g.vertices()])
  
  diam = np.max(sda)
  aspl = np.sum(sda)
  aspl /= n*(n-1)
  
  return diam, aspl
def find_centres(graph, minR):
    edge_weights = graph.edge_properties['weights']
    num_voxel = graph.num_vertices()
    sample_points = [];
    
    # first sample point
    pointID = np.random.randint(num_voxel)
    sample_points.append(pointID)

    # list of voxels to process
    process_list = np.arange(num_voxel)

    # get all distances from sample point
    dmap = gtt.shortest_distance(graph, graph.vertex(pointID)).get_array()
    d = np.asarray(dmap)

    # mark as not to process, if distances is below minR
    process_list[d<=minR] = -1
    # create list of possible next points between minR and 2*minR
    next_pick = process_list[(d>minR) & (d<2*minR) & (process_list>0)]

    while (next_pick.size>0):

        # pick next one randomly
        randElt = np.random.randint(next_pick.size)
        pointID = next_pick[randElt]

        # add to list
        sample_points.append(pointID)

        # get distances
        dmap = gtt.shortest_distance(graph, graph.vertex(pointID), weights=edge_weights).get_array()
        d = np.asarray(dmap)

        # update list to draw next centre point from
        process_list[d<=minR] = -1
        add_to_watchlist = process_list[(d>minR) & (d<2*minR) & (process_list>0)]
        update_watchlist = next_pick[process_list[next_pick]>0]
        next_pick = np.concatenate((update_watchlist, add_to_watchlist))

    return sample_points
Ejemplo n.º 5
0
    def calc_node_centricity(self):
        """
        calc node centricity of feature nodes(terminal and junction nodes)
        T1 in Oscar's EG 2010 paper
        """
        self._calc_edge_length()
        node_centricity = []
        for n_idx in self.feature_node_index:
            dist = topology.shortest_distance(self.skel_graph, source=self.skel_graph.vertex(n_idx), weights=self.edge_length_map)
            node_centricity.append(dist.a.mean())

        node_centricity = np.array(node_centricity)
        self.node_centricity = node_centricity / np.max(node_centricity)
Ejemplo n.º 6
0
def make_graph(n):
    start = clock()
    cexp.preferential_attachment(n, m=3, gamma=1.05, c=.4,
                                 bonus_neighbor_prob=.13)
    k = cexp.to_graph_tool()
    lcc = label_largest_component(k)
    k.set_vertex_filter(lcc)
    lcc_nodes = np.where(lcc.a)[0]
    full_dst = shortest_distance(k, dense=False)
    full_mat = np.zeros((n, n), dtype=np.uint8)
    for v in k.vertices():
        full_mat[int(v), :] = full_dst[v].a.astype(np.uint8)
    del full_dst
    print('make_graph {:.3f}'.format(clock() - start))
    return k, lcc_nodes, full_mat
Ejemplo n.º 7
0
    def normalize_skeleton(self):
        """
        calc the pose-normalized skeleton to distinguish symmetric nodes
        using multidimensional scaling method(MDS)
        """
        v_num = len(self.verts)
        geodesic_dist = np.zeros((v_num, v_num))
        geodesic_dist_map = topology.shortest_distance(self.skel_graph, weights=self.edge_length_map)
        for i in xrange(v_num):
            geodesic_dist[i] = geodesic_dist_map[self.skel_graph.vertex(i)].a

        mds = manifold.MDS(n_components=3, max_iter=500, eps=1e-10, dissimilarity="precomputed", n_jobs=-2, n_init=1)
        verts_mean = self.verts - self.verts.mean(axis=0)
        normalized_verts = mds.fit(geodesic_dist, init=verts_mean).embedding_
        #scale = np.sqrt((verts_mean ** 2).sum()) / np.sqrt((normalized_verts ** 2).sum())
        #normalized_verts *= scale
        self.normalized_verts = normalized_verts
        self.normalized_feature_verts = normalized_verts[self.feature_node_index]
        return self.normalized_verts
Ejemplo n.º 8
0
    def calc_path_length_ratio(self):
        """
        for each feature node pair segment, calculate path length ratio
        normalized, to make it scale invariant
        """
        path_length = np.zeros((len(self.feature_node_index), len(self.feature_node_index)), dtype=float)
        for i, n_idx in enumerate(self.feature_node_index):
            for j, m_idx in enumerate(self.feature_node_index[i+1:], start=i+1):
                length = topology.shortest_distance(self.skel_graph, self.skel_graph.vertex(n_idx), self.skel_graph.vertex(m_idx), weights=self.edge_length_map)
                if length != None :
                    path_length[i,j] = path_length[j,i] = length
                else:
                    print 'compute path length ratio error'
                    return None

        ### extract path length from each feature node to junction nodes ###
        ### Careful!! path_length MUST start from junction node
        self.path_to_junction = path_length[:,:len(self.junction_index)]

        self.path_length_ratio = path_length / path_length.max()
        return self.path_length_ratio
Ejemplo n.º 9
0
    def shortest_distances(self, source, targets, times):
        ep_time_map = self.graph.new_edge_property("double", vals=times)
        distances, pred_map = gtt.shortest_distance(g=self.graph,
                                                    source=source,
                                                    target=targets,
                                                    weights=ep_time_map,
                                                    pred_map=True)
        return distances, pred_map.a


#    def nodes_number(self):
#        return self.nodes_number

#    def links_number(self):
#        return self.links_number

#    def capacities(self):
#        return self.capacities

#    def freeflow_times(self):
#        return self.freeflow_times
def populate_mask(graph, sample_points, imgData, coords, offset=0):
    # find distance of all voxels to region centres
    edge_weights = graph.edge_properties['weights']
    distance_mapping = np.zeros((len(sample_points), graph.num_vertices()))
    counter = 0
    for each_centre in sample_points:
        pointID = imgData[each_centre[0],each_centre[1],each_centre[2]]-1

        # get distances
        dmap = gtt.shortest_distance(graph, graph.vertex(pointID), weights=edge_weights).get_array()
        d = np.asarray(dmap)

        distance_mapping[counter,:] = d
        counter = counter + 1

    # assign voxels to closest centre
    centres = distance_mapping.argmin(axis=0)
    for each_voxel in np.arange(len(centres)):
        imgData[coords[each_voxel][0],coords[each_voxel][1],coords[each_voxel][2]] = centres[each_voxel]+offset+1

    return imgData
Ejemplo n.º 11
0
def calculate_distances(g, acc_param=0, g_distances=None):
    if g_distances == None:
        print('start calculate distances')
        g_distances = topology.shortest_distance(g)
        print('calculate distances done')
    else:
        print('use provided distances topology')
        g_distances = g_distances

    dist = []
    counter = 0

    if acc_param > 0:
        if g.is_directed():
            all_pairs = n_random_permutations(
                g.vertices(), int(acc_param * g.num_vertices()))
            num_pairs = int(acc_param * g.num_vertices())
        else:
            all_pairs = n_random_combinations(
                g.vertices(), int(acc_param * g.num_vertices()))
            num_pairs = int(acc_param * g.num_vertices())
    else:
        if g.is_directed():
            all_pairs = itertools.permutations(g.vertices(), 2)
            num_pairs = g.num_vertices()**2
        else:
            all_pairs = itertools.combinations(g.vertices(), 2)
            num_pairs = nCr(g.num_vertices(), 2)

    print('select pairs of permutations/combinations done')

    for (v1, v2) in all_pairs:
        dist.append(g_distances[v1][v2])

        if (counter % int(num_pairs / 20) == 0):
            print(round(counter / num_pairs * 100, 1), '%')
        counter = counter + 1

    return dist
Ejemplo n.º 12
0
def fill_missing_time(g, t, root, obs_nodes, infection_times, debug=False):
    # get ancestor and descendent
    td_vis = TopDownVisitor(np.ones(g.num_vertices(), dtype=np.int) * -1, root, obs_nodes)
    bfs_search(t, source=root, visitor=td_vis)

    bu_vis = BottomUpVisitor(np.ones(g.num_vertices(), dtype=np.int) * -1, root, obs_nodes)
    bottom_up_traversal(t, vis=bu_vis)

    # infer the time
    hidden_nodes = set(map(int, t.vertices())) - set(obs_nodes)
    assert (root not in hidden_nodes), 'root is hidden'

    pred_infection_times = np.array(infection_times)
    dist = shortest_distance(t, source=root)
    for v in hidden_nodes:
        ans, des = td_vis.pred[v], bu_vis.pred[v]
        assert ans != -1
        assert des != -1, \
                      '{}, {}'.format(v, (t.vertex(v).in_degree(), t.vertex(v).out_degree()))  # 1, 0, v=leave

        if debug:
            print(v, ans, des)
            
        denum = dist[des] - dist[ans]
        numer = dist[v] - dist[ans]
        pred_infection_times[v] = (infection_times[ans] +
                                   abs(numer / denum * (infection_times[des] - infection_times[ans])))
        
        if debug:
            assert pred_infection_times[v] > infection_times[ans]
            assert pred_infection_times[v] < infection_times[des]

            print('t(ans), t(des): {}, {}'.format(infection_times[ans], infection_times[des]))
            print('numer {}'.format(numer))
            print('denum {}'.format(denum))
            print('pred time {}'.format(pred_infection_times[v]))

    return pred_infection_times
Ejemplo n.º 13
0
def main():
    results_dict = results_to_dict(RESULTS_PATH)
    graph = load_graph(SYNSETS_GRAPH)

    for source_lemma, targets_synset_ids in results_dict.iteritems():
        for source_node in graph._lemma_to_nodes_dict[source_lemma]:

            distances = []
            for target_synset_id in targets_synset_ids:
                target_vertex = graph.get_node_for_synset_id(
                    int(target_synset_id))
                distance = shortest_distance(graph.use_graph_tool(),
                                             source_node,
                                             target_vertex,
                                             max_dist=6,
                                             directed=False)
                distances.append(distance)

            min_dist = min(distances)
            if min_dist > 6:
                min_dist = -1

            print("{},{}".format(source_lemma, min_dist))
Ejemplo n.º 14
0
def get_graph(balanced=False):
    """Load the graph from BASENAME and optionally remove positive edges to
    balance the graph. NOTE: this only modify redensify structure and not
    graph_tool & its distance matrix"""
    if balanced:
        import persistent
    if os.path.isfile(BASENAME+'.gt'):
        g = graph_tool.load_graph(BASENAME+'.gt')
        dst_mat = np.load(BASENAME+'_dst.npy')
        cexp.to_python_graph(g)
        if balanced:
            to_delete = persistent.load_var(BASENAME+'_balance.my')
            for edge in to_delete:
                pot.delete_edge(redensify.G, edge, redensify.EDGES_SIGN)
        return g, dst_mat
    if not PA:
        cexp.random_signed_communities(2, 500, 13, 11.5/500, .0, .0)
        g = cexp.to_graph_tool()
    else:
        cexp.preferential_attachment(1000, gamma=1.4, m=12)
        cexp.turn_into_signed_graph_by_propagation(2)
        DEGREES = sorted(((node, len(adj))
                          for node, adj in cexp.redensify.G.items()),
                         key=lambda x: x[1])
        u, v = DEGREES[-1][0], DEGREES[-2][0]
        u, v = v, u if u > v else u, v
        del cexp.redensify.EDGES_SIGN[(u, v)]
        cexp.redensify.G[u].remove(v)
        cexp.redensify.G[v].remove(u)
    n = g.num_vertices()
    dst = shortest_distance(g, dense=False)
    dst_mat = np.zeros((n, n), dtype=np.uint8)
    for v in g.vertices():
        dst_mat[int(v), :] = dst[v].a.astype(np.uint8)
    g.save(BASENAME+'.gt')
    np.save(BASENAME+'_dst', dst_mat)
def populate_mask(graph, sample_points, imgData, coords, offset=0):
    # find distance of all voxels to region centres
    edge_weights = graph.edge_properties['weights']
    distance_mapping = np.zeros((len(sample_points), graph.num_vertices()))
    counter = 0
    for each_centre in sample_points:
        pointID = imgData[each_centre[0], each_centre[1], each_centre[2]] - 1

        # get distances
        dmap = gtt.shortest_distance(graph,
                                     graph.vertex(pointID),
                                     weights=edge_weights).get_array()
        d = np.asarray(dmap)

        distance_mapping[counter, :] = d
        counter = counter + 1

    # assign voxels to closest centre
    centres = distance_mapping.argmin(axis=0)
    for each_voxel in np.arange(len(centres)):
        imgData[coords[each_voxel][0], coords[each_voxel][1],
                coords[each_voxel][2]] = centres[each_voxel] + offset + 1

    return imgData
Ejemplo n.º 16
0
    def calculate_centrality(self):

        ix = 0
        glen = list(self.g.vertices()).__len__()

        start = time.time()

        for vertex in self.g.vertices():
            sd = shortest_distance(self.g, source=vertex, directed=False)
            p = [x for x in filter(lambda p: p != 2147483647, sd)]
            sump = sum(p)
            self.g.vp.harmonic_centrality[vertex] = 1 / float(sump) if sump != 0 else 0
            ix += 1
            stdout.write("Calculating centrality: " + "\r%.2f " % ((float(ix) / glen) * 100) + "%")

        stdout.write("\n")
        maxval = max(list([self.g.vp.harmonic_centrality[x] for x in self.g.vertices()]))
        print(maxval)

        for vertex in self.g.vertices():
            self.g.vp.harmonic_centrality[vertex] = self.g.vp.harmonic_centrality[vertex] / maxval

        end = time.time()
        print ("centrality calculated in " + str(end - start))
Ejemplo n.º 17
0
 def max_depth(self):
     return max(shortest_distance(self._graph,
                                  source=self._graph.vertex(0)))
Ejemplo n.º 18
0
def load_road_skim_graph(path="data/taxi_graphs/final_graph_hour_0.graphml"):
    g = load_graph(path)
    vm = g.vertex_properties["_graphml_vertex_id"]
    vmr = {vm[v]: v for v in g.vertices()}
    skim_table = shortest_distance(g, weights=g.edge_properties["weight"])
    return (g, vmr, skim_table)
Ejemplo n.º 19
0
bfs_tree = set(pot.get_bfs_tree(rw.G, root))
test_edges = all_lcc_edges - bfs_tree
test_graph = {}
for u, v in test_edges:
    pot.add_edge_to_tree(test_graph, u, v)
bfsmap = k.new_edge_property('boolean')
for e in k.edges():
    u, v = int(e.source()), int(e.target())
    if (u, v) in bfs_tree:
        bfsmap[e] = True
    else:
        bfsmap[e] = False
k.set_vertex_filter(None)
k.set_edge_filter(bfsmap)
print_diag('build tree {}, {} test edges'.format(root, len(test_edges)))
bfs_dst = shortest_distance(k, dense=False)
bfs_mat = np.zeros((n, n), dtype=np.uint8)
for v in k.vertices():
    bfs_mat[int(v), :] = bfs_dst[v].a.astype(np.uint8)
print_diag('computed pairwise distance')
bsum = 0
bsize = 0
esum = 0
for i, v in enumerate(lcc_nodes):
    graph_distance = dst_mat[v, lcc_nodes[i+1:]]
    tree_distance = bfs_mat[v, lcc_nodes[i+1:]]
    if v in test_graph:
        esum += bfs_mat[v, sorted(test_graph[v])].sum()
    ratio = (tree_distance/graph_distance)
    bsum += ratio.sum()
    bsize += ratio.shape[0]
Ejemplo n.º 20
0
def prepare_hierarchies_neighborhood(
    experiments_path: ExperimentPaths,
    conceptnet_graph_path: Union[str, Path],
    filter_graphs_to_intersected_vertices: bool = True,
) -> pd.DataFrame:
    conceptnet_hierarchy_neighborhood_df_path = (
        experiments_path.experiment_path /
        f"shortest-paths-pairs-{conceptnet_graph_path.stem}-df.pkl")

    logger.info("Prepare graphs")

    conceptnet_graph, vertices_conceptnet = prepare_conceptnet(
        conceptnet_graph_path)
    mlflow.log_metric("conceptnet_graph_nodes",
                      conceptnet_graph.num_vertices())
    mlflow.log_metric("conceptnet_graph_edges", conceptnet_graph.num_edges())

    aspect_graph, experiment_paths = prepare_aspect_graph(experiments_path)
    mlflow.log_metric("aspect_graph_nodes", aspect_graph.num_vertices())
    mlflow.log_metric("aspect_graph_edges", aspect_graph.num_edges())

    aspect_graph_intersected = Graph(aspect_graph)
    conceptnet_graph_intersected = Graph(conceptnet_graph)

    aspect_graph_intersected, conceptnet_graph_intersected = intersected_nodes(
        aspect_graph=aspect_graph_intersected,
        conceptnet_graph=conceptnet_graph_intersected,
        filter_graphs_to_intersected_vertices=
        filter_graphs_to_intersected_vertices,
        property_name="aspect_name",
    )

    mlflow.log_param("filter_graphs_to_intersected_vertices",
                     filter_graphs_to_intersected_vertices)

    mlflow.log_metric(
        "conceptnet_graph_intersected_nodes",
        conceptnet_graph_intersected.num_vertices(),
    )
    mlflow.log_metric("aspect_graph_intersected_nodes",
                      aspect_graph_intersected.num_vertices())

    mlflow.log_metric("conceptnet_graph_intersected_edges",
                      conceptnet_graph_intersected.num_edges())
    mlflow.log_metric("aspect_graph_intersected_edges",
                      aspect_graph_intersected.num_edges())

    aspect_names_intersected = list(
        aspect_graph_intersected.vertex_properties["aspect_name"])

    vertices_name_to_aspect_vertex = dict(
        zip(aspect_graph.vertex_properties["aspect_name"],
            aspect_graph.vertices()))
    aspect_graph_vertices_intersected = [
        vertices_name_to_aspect_vertex[a] for a in aspect_names_intersected
    ]
    shortest_distances_aspect_graph = np.array([
        shortest_distance(
            g=aspect_graph,
            source=v,
            target=aspect_graph_vertices_intersected,
            directed=True,
        ) for v in tqdm(aspect_graph_vertices_intersected,
                        desc="Aspect graph shortest paths...")
    ])

    conceptnet_vertices_intersected = [
        vertices_conceptnet[a] for a in aspect_names_intersected
    ]

    mlflow.log_metric("conceptnet_vertices_intersected len",
                      len(conceptnet_vertices_intersected))
    mlflow.log_metric("aspect_graph_vertices_intersected len",
                      len(aspect_graph_vertices_intersected))
    assert len(conceptnet_vertices_intersected) == len(
        aspect_graph_vertices_intersected
    ), "Wrong sequence of vertices in both graphs"

    shortest_distances_conceptnet = np.array([
        shortest_distance(
            g=conceptnet_graph,
            source=v,
            target=conceptnet_vertices_intersected,
            directed=True,
        ) for v in tqdm(conceptnet_vertices_intersected,
                        desc="Conceptnet shortest paths...")
    ])

    pairs = []
    for aspect_1_idx, aspect_1 in tqdm(enumerate(aspect_names_intersected),
                                       desc="Pairs distances..."):
        for aspect_2_idx, aspect_2 in enumerate(aspect_names_intersected):
            pairs.append((
                aspect_1,
                aspect_2,
                shortest_distances_aspect_graph[aspect_1_idx][aspect_2_idx],
                shortest_distances_conceptnet[aspect_1_idx][aspect_2_idx],
            ))

    pairs_df = pd.DataFrame(
        pairs,
        columns=[
            "aspect_1",
            "aspect_2",
            "shortest_distance_aspect_graph",
            "shortest_distance_conceptnet",
        ],
    )

    logger.info("Dump DataFrame with pairs")
    pairs_df.to_pickle(conceptnet_hierarchy_neighborhood_df_path.as_posix())
    mlflow.log_artifact(conceptnet_hierarchy_neighborhood_df_path.as_posix())
    mlflow.log_metric("conceptnet_hierarchy_neighborhood_df_len",
                      len(pairs_df))
    logger.info(
        f"DataFrame with pairs dumped in: {experiment_paths.conceptnet_hierarchy_neighborhood.as_posix()}"
    )

    return pairs_df
Ejemplo n.º 21
0
    def _compute_isochrone(self, params) -> None:

        iso_time, dist = params
        self.logger.info(
            f"Compute isochrone: {iso_time} minutes => {dist} meters")

        points_found = []
        for source in self._source_vertices:
            _, pred = shortest_distance(
                self._graph,
                source=source,
                weights=self._graph.edge_weights,
                max_dist=dist,
                return_reached=True,
            )
            points_found.extend(
                [self._graph.vertex_names[vertex] for vertex in pred])

        all_edges_found_topo_uuids_count = Counter(
            list(
                itertools.chain(*[
                    self._graph.find_edges_from_vertex(pt)
                    for pt in points_found
                ])))
        all_edges_found_topo_uuids = set(
            list(
                dict(
                    filter(
                        lambda x: x[1] > 1,
                        all_edges_found_topo_uuids_count.items(),
                    )).keys()))

        network_mask = self._network_gdf["topo_uuid"].isin(
            all_edges_found_topo_uuids)

        if self._build_polygon:
            iso_polygon_computed = (self._network_gdf.loc[network_mask].buffer(
                self._display_mode_params["path_buffered"],
                cap_style=self.__DEFAULT_CAPSTYLE,
                join_style=self.__DEFAULT_JOINSTYLE,
                resolution=self._display_mode_params["resolution"],
            ).unary_union)
            # we want to isolate subnetwork, so if we got a multipolygon it means that we have 2+ subnetwork
            if iso_polygon_computed.geom_type == "Polygon":
                iso_polygon_computed = MultiPolygon([iso_polygon_computed])

            output_iso_polygon = []
            for polyg in iso_polygon_computed.geoms:
                iso_polygon = polyg.buffer(  # merge them now
                    self._display_mode_params["dilatation"],
                    cap_style=self._display_mode_params["cap_style"],
                    join_style=self._display_mode_params["join_style"],
                    resolution=self._display_mode_params["resolution"],
                ).buffer(
                    self._display_mode_params["erosion"],
                    cap_style=self._display_mode_params["cap_style"],
                    join_style=self._display_mode_params["join_style"],
                    resolution=self._display_mode_params["resolution"],
                )
                iso_polygon = convert_to_polygon(iso_polygon)
                output_iso_polygon.extend(iso_polygon)

            iso_polygon = MultiPolygon(output_iso_polygon)

            # compute exterior
            iso_polygon = MultiPolygon([
                Polygon(iso_polygon_part.exterior)
                for iso_polygon_part in convert_to_polygon(iso_polygon)
            ])
        else:
            iso_polygon = None

        self._isochrones_data.append({
            self.__ISOCHRONE_NAME_FIELD: iso_time,
            self.__TIME_UNIT_FIELD: time_unit,
            self.__ISODISTANCE_NAME_FIELD: dist,
            self.__DISTANCE_UNIT_FIELD: distance_unit,
            self.__NETWORK_MASK: network_mask,
            "geometry": iso_polygon,
        })
Ejemplo n.º 22
0
def curvature_estimation(radius_hit,
                         graph_file='temp.gt',
                         method='VV',
                         page_curvature_formula=False,
                         area2=True,
                         poly_surf=None,
                         full_dist_map=False,
                         cores=10,
                         runtimes='',
                         vertex_based=False,
                         sg=None):
    """
    Runs the second pass of the modified Normal Vector Voting algorithm with
    the given method to estimate principle curvatures and directions for a
    surface using its triangle graph (third and the last part used by
    normals_directions_and_curvature_estimation).

    Args:
        radius_hit (float): radius in length unit of the graph;
            it should be chosen to correspond to radius of smallest features of
            interest on the surface
        graph_file (string, optional): name for a graph file after the first run
            of the algorithm (default 'temp.gt' will be removed after loading)
        method (str, optional): a method to run in the second pass ('VV' and
            'SSVV' are possible, default is 'VV')
        page_curvature_formula (boolean, optional): if True (default False),
            normal curvature formula from Page et al. is used in VV (see
            collect_curvature_votes)
        area2 (boolean, optional): if True (default False), votes are
            weighted by triangle area also in the second pass (not possible for
            vertex-based approach)
        poly_surf (vtkPolyData): scaled surface from which the graph was
            generated, (required only if method="SSVV", default None)
        full_dist_map (boolean, optional): if True, a full distance map is
            calculated for the whole graph (not possible for vertex-based
            approach), otherwise a local distance map is calculated later for
            each vertex (default)
        cores (int): number of cores to run VV (collect_curvature_votes and
            estimate_curvature) in parallel (default 10)
        runtimes (str): if given, runtimes and some parameters are added to
            this file (default '')
        vertex_based (boolean, optional): if True (default False), curvature is
            calculated per triangle vertex instead of triangle center.
        sg (TriangleGraph or PointGraph): if given (default None), this graph
            object will be used instead of loading from the 'graph_file' file

    Returns:
        a tuple of TriangleGraph or PointGraph (if pg was given) graph and
        vtkPolyData surface of triangles with classified orientation and
        estimated normals or tangents, principle curvatures and directions
    """
    # Preparation (calculations that are the same for the whole graph)
    t_begin0 = time.time()
    if sg is None:
        if vertex_based:
            sg = PointGraph()
        else:
            sg = TriangleGraph()
        sg.graph = load_graph(graph_file)

    if vertex_based:
        # cannot weight by triangle area in vertex-based approach
        area2 = False

    if full_dist_map is True and sg.__class__.__name__ == "TriangleGraph":
        # * Distance map between all pairs of vertices *
        t_begin0_2 = time.time()
        print("\nCalculating the full distance map...")
        full_dist_map = shortest_distance(  # <class 'graph_tool.PropertyMap'>
            sg.graph,
            weights=sg.graph.ep.distance)
        t_end0_2 = time.time()
        duration0_2 = t_end0_2 - t_begin0_2
        minutes, seconds = divmod(duration0_2, 60)
        print('Calculation of the full distance map took: {} min {} s'.format(
            minutes, seconds))
    else:
        full_dist_map = None

    g_max = math.pi * radius_hit / 2.0
    sigma = g_max / 3.0
    if method == "VV" and area2:
        a_max = sg.graph.gp.max_triangle_area
        print("Maximal triangle area = {}".format(a_max))
    else:
        a_max = 0.0

    # * Adding vertex properties to be filled by all curvature methods *
    # vertex properties for storing the estimated principal directions of the
    # maximal and minimal curvatures of the corresponding triangle:
    sg.graph.vp.t_1 = sg.graph.new_vertex_property("vector<float>")
    sg.graph.vp.t_2 = sg.graph.new_vertex_property("vector<float>")
    # vertex properties for storing the estimated maximal and minimal curvatures
    # of the corresponding triangle:
    sg.graph.vp.kappa_1 = sg.graph.new_vertex_property("float")
    sg.graph.vp.kappa_2 = sg.graph.new_vertex_property("float")
    # vertex property for storing the Gaussian curvature calculated from kappa_1
    # and kappa_2 at the corresponding triangle:
    sg.graph.vp.gauss_curvature_VV = sg.graph.new_vertex_property("float")
    # vertex property for storing the mean curvature calculated from kappa_1 and
    # kappa_2 at the corresponding triangle:
    sg.graph.vp.mean_curvature_VV = sg.graph.new_vertex_property("float")
    # vertex property for storing the shape index calculated from kappa_1 and
    # kappa_2 at the corresponding triangle:
    sg.graph.vp.shape_index_VV = sg.graph.new_vertex_property("float")
    # vertex property for storing the curvedness calculated from kappa_1 and
    # kappa_2 at the corresponding triangle:
    sg.graph.vp.curvedness_VV = sg.graph.new_vertex_property("float")

    t_end0 = time.time()
    duration0 = t_end0 - t_begin0
    minutes, seconds = divmod(duration0, 60)
    print('Preparation took: {} min {} s'.format(minutes, seconds))

    t_begin2 = time.time()
    if method == 'VV':
        if page_curvature_formula:
            method_print = 'NVV'
        elif area2:
            method_print = 'AVV'
        else:
            method_print = 'RVV'
    else:  # method == 'SSVV'
        method_print = 'SSVV'
    print("\nSecond pass: estimating principle curvatures and directions for "
          "surface patches using {}...".format(method_print))

    # shortcuts
    vertices = sg.graph.vertices
    vertex = sg.graph.vertex
    collect_curvature_votes = sg.collect_curvature_votes
    gen_curv_vote = sg.gen_curv_vote
    estimate_curvature = sg.estimate_curvature
    second_pass = sg.second_pass
    orientation_class = sg.graph.vp.orientation_class
    add_curvature_descriptors_to_vertex = sg.add_curvature_descriptors_to_vertex
    graph_to_triangle_poly = sg.graph_to_triangle_poly

    # Estimate principal directions and curvatures (and calculate the
    # Gaussian and mean curvatures, shape index and curvedness) for vertices
    # belonging to a surface patch
    good_vertices_ind = []
    for v in vertices():
        if orientation_class[v] == 1:
            good_vertices_ind.append(int(v))
            # Voting and curvature estimation for SSVV:
            if method == "SSVV":  # sequential processing, edits the graph
                # curvatures saved in the graph, placeholders where error
                gen_curv_vote(poly_surf, v, radius_hit)
        else:  # add placeholders for vertices classified as crease or noise
            add_curvature_descriptors_to_vertex(v, None, None, None, None,
                                                None, None, None, None)
    print("{} vertices to estimate curvature".format(len(good_vertices_ind)))

    if method == "VV":
        if cores > 1:  # parallel processing
            p = pp.ProcessPool(cores)
            print('Opened a pool with {} processes'.format(cores))
            # results_list has same length as good_vertices_ind
            # columns: t_1, t_2, kappa_1, kappa_2, gauss_curvature,
            # mean_curvature, shape_index, curvedness
            results_list = p.map(
                partial(second_pass,
                        g_max=g_max,
                        sigma=sigma,
                        page_curvature_formula=page_curvature_formula,
                        a_max=a_max,
                        full_dist_map=full_dist_map), good_vertices_ind)
            p.close()
            p.clear()

            results_array = np.array(results_list, dtype=object)
            t_1_array = results_array[:, 0]
            t_2_array = results_array[:, 1]
            kappa_1_array = results_array[:, 2]
            kappa_2_array = results_array[:, 3]
            gauss_curvature_array = results_array[:, 4]
            mean_curvature_array = results_array[:, 5]
            shape_index_array = results_array[:, 6]
            curvedness_array = results_array[:, 7]

            # Add the curvature descriptors as properties to the graph:
            # (v_ind is vertex v index, i is v_ind index in results arrays)
            for i, v_ind in enumerate(good_vertices_ind):
                v = vertex(v_ind)
                add_curvature_descriptors_to_vertex(
                    v, t_1_array[i], t_2_array[i], kappa_1_array[i],
                    kappa_2_array[i], gauss_curvature_array[i],
                    mean_curvature_array[i], shape_index_array[i],
                    curvedness_array[i])

        else:  # cores == 1, sequential processing
            # Curvature votes collection and estimation for VV:
            for i, v_ind in enumerate(good_vertices_ind):
                B_v = collect_curvature_votes(
                    v_ind,
                    g_max,
                    sigma,
                    page_curvature_formula=page_curvature_formula,
                    a_max=a_max,
                    full_dist_map=full_dist_map)
                results = estimate_curvature(v_ind, B_v)
                # Add the properties to the graph:
                v = vertex(v_ind)
                add_curvature_descriptors_to_vertex(v, *results)

    # Transforming the resulting graph to a surface with triangles:
    surface_curv = graph_to_triangle_poly(verbose=False)

    t_end2 = time.time()
    duration2 = t_end2 - t_begin2
    minutes, seconds = divmod(duration2, 60)
    print('Second run of {} took: {} min {} s'.format(method, minutes,
                                                      seconds))

    # adding to the runtimes CSV file:
    # - method
    # - duration2
    if runtimes != '':
        with open(runtimes, 'a') as f:
            f.write("{};{}\n".format(method_print, duration2))

    return sg, surface_curv
def shortest_distances(source):
    with gzip.open(f"data/distances/{source}.csv.gz", "wt") as file:
        distances = topology.shortest_distance(conceptnet, source)
        for target, distance in enumerate(distances.a):
            if distance < INFINITY:
                print(f"{target},{distance}", file=file)
Ejemplo n.º 24
0
def shortest_distance(g,
                      sources=None,
                      targets=None,
                      directed=None,
                      weights=None,
                      combine_weights="mean"):
    '''
    Returns the length of the shortest paths between `sources`and `targets`.
    The algorithms return infinity if there are no paths between nodes.

    Parameters
    ----------
    g : :class:`~nngt.Graph`
        Graph to analyze.
    sources : list of nodes, optional (default: all)
        Nodes from which the paths must be computed.
    targets : list of nodes, optional (default: all)
        Nodes to which the paths must be computed.
    directed : bool, optional (default: ``g.is_directed()``)
        Whether the edges should be considered as directed or not
        (automatically set to False if `g` is undirected).
    weights : str or array, optional (default: binary)
        Whether to use weighted edges to compute the distances. By default,
        all edges are considered to have distance 1.
    combine_weights : str, optional (default: 'mean')
        How to combine the weights of reciprocal edges if the graph is directed
        but `directed` is set to False. It can be:

        * "sum": the sum of the edge attribute values will be used for the new
          edge.
        * "mean": the mean of the edge attribute values will be used for the
          new edge.
        * "min": the minimum of the edge attribute values will be used for the
          new edge.
        * "max": the maximum of the edge attribute values will be used for the
          new edge.

    Returns
    -------
    distance : float, or 1d/2d numpy array of floats
        Distance (if single source and single target) or distance array.
        For multiple sources and targets, the shape of the matrix is (S, T),
        with S the number of sources and T the number of targets; for a single
        source or target, return a 1d-array of length T or S.

    References
    ----------
    .. [gt-sd] :gtdoc:`topology.shortest_distance`
    '''
    num_nodes = g.node_nb()

    g, graph, w = _get_gt_graph(g,
                                directed,
                                weights,
                                combine_weights,
                                return_all=True)

    w = _get_gt_weights(g, w)

    dist_emap = None
    tgt_vtx = None

    maxint = np.iinfo(np.int32).max
    maxflt = np.finfo(np.float64).max

    # convert sources and targets
    if sources is not None:
        if nonstring_container(sources):
            sources = [graph.vertex(s) for s in sources]
        else:
            sources = [graph.vertex(sources)]

    if nonstring_container(targets):
        tgt_vtx = [graph.vertex(t) for t in targets]
    elif targets is not None:
        tgt_vtx = [graph.vertex(targets)]
        targets = [targets]

    # compute only specific paths
    if sources is not None:
        # single source/target case
        if targets is not None and len(sources) == 1 and len(targets) == 1:
            distance = gtt.shortest_distance(graph,
                                             source=sources[0],
                                             target=tgt_vtx[0],
                                             weights=w)

            if w is None:
                if distance == maxint:
                    return np.inf
            elif distance == maxflt:
                return np.inf

            return float(distance)

        # multiple sources
        num_sources = len(sources)
        num_targets = num_nodes if targets is None else len(targets)

        mat_dist = np.full((num_sources, num_targets), np.NaN)

        for s in sources:
            s_int = int(s)
            dist = gtt.shortest_distance(graph,
                                         source=s,
                                         target=tgt_vtx,
                                         weights=w)

            mat_dist[s_int] = dist.a

        # convert max int and float to inf
        if w is None:
            mat_dist[mat_dist == maxint] = np.inf
        else:
            mat_dist[mat_dist == maxflt] = np.inf

        if num_sources == 1:
            return mat_dist[0]

        if num_targets == 1:
            return mat_dist[0]

        return mat_dist

    # if source is None, then we compute all paths
    dist = gtt.shortest_distance(graph, weights=w)

    # transpose (graph-tool uses columns as sources)
    mat_dist = dist.get_2d_array([i for i in range(num_nodes)]).astype(float).T

    if w is None:
        # check unconnected with int32
        mat_dist[mat_dist == maxint] = np.inf
    else:
        # check float max
        mat_dist[mat_dist == maxflt] = np.inf

    if targets is not None:
        if len(targets) == 1:
            return mat_dist.T[0]

        return mat_dist[:, targets]

    return mat_dist
Ejemplo n.º 25
0
def normals_estimation(sg,
                       radius_hit,
                       epsilon=0,
                       eta=0,
                       full_dist_map=False,
                       cores=10,
                       runtimes='',
                       graph_file='temp.gt'):
    """
    Runs the modified Normal Vector Voting algorithm to estimate surface
    orientation (classification in surface patch with normal, crease junction
    with tangent or no preferred orientation) for a surface using its graph.

    Adds the "orientation_class" (1-3), the estimated normal "n_v" (if class is
    1) and the estimated_tangent "t_v" (if class is 2) as vertex properties
    into the graph.

    Adds the estimated normal "n_v" as vertex property into the graph.

    Args:
        sg (TriangleGraph or PointGraph): triangle or point graph generated
            from a surface of interest
        radius_hit (float): radius in length unit of the graph;
            it should be chosen to correspond to radius of smallest features of
            interest on the surface
        epsilon (float, optional): parameter of Normal Vector Voting algorithm
            influencing the number of triangles classified as "crease junction"
            (class 2), default 0
        eta (float, optional): parameter of Normal Vector Voting algorithm
            influencing the number of triangles classified as "crease junction"
            (class 2) and "no preferred orientation" (class 3, see Notes),
            default 0
        full_dist_map (boolean, optional): if True, a full distance map is
            calculated for the whole graph (not possible for PointGraph),
            otherwise a local distance map is calculated later for each vertex
            (default)
        cores (int, optional): number of cores to run VV (collect_normal_votes
            and estimate_normal) in parallel (default 10)
        runtimes (str, optional): if given, runtimes and some parameters are
            added to this file (default '')
        graph_file (str, optional): file path to save the graph, default file
            'temp.gt'

    Returns:
        None

    Note:
        * Maximal geodesic neighborhood distance g_max for normal vector voting
          will be derived from radius_hit: g_max = pi * radius_hit / 2
        * If epsilon = 0 and eta = 0 (default), all triangles will be classified
          as "surface patch" (class 1).
    """
    # Preparation (calculations that are the same for the whole graph)
    t_begin0 = time.time()
    print('\nPreparing for running modified Vector Voting...')

    # * Maximal geodesic neighborhood distance g_max for normal vector voting *
    # g_max is 1/4 of circle circumference with radius=radius_hit
    g_max = math.pi * radius_hit / 2
    print("radius_hit = {}".format(radius_hit))
    print("g_max = {}".format(g_max))

    # * sigma *
    sigma = g_max / 3.0

    # * Maximal triangle area *
    a_max = sg.graph.gp.max_triangle_area
    print("Maximal triangle area = {}".format(a_max))

    # * Orientation classification parameters *
    print("epsilon = {}".format(epsilon))
    print("eta = {}".format(eta))

    # * Adding vertex properties to be filled in estimate_normal *
    # vertex property storing the orientation class of the vertex: 1 if it
    # belongs to a surface patch, 2 if it belongs to a crease junction or 3 if
    # it doesn't have a preferred orientation:
    sg.graph.vp.orientation_class = sg.graph.new_vertex_property("int")
    # vertex property for storing the estimated normal of the corresponding
    # vertex (if the vertex belongs to class 1):
    sg.graph.vp.n_v = sg.graph.new_vertex_property("vector<float>")
    # vertex property for storing the estimated tangent of the corresponding
    # vertex (if the vertex belongs to class 2):
    sg.graph.vp.t_v = sg.graph.new_vertex_property("vector<float>")

    if full_dist_map is True and sg.__class__.__name__ == "TriangleGraph":
        # * Distance map between all pairs of vertices *
        t_begin0_2 = time.time()
        print("\nCalculating the full distance map...")
        full_dist_map = shortest_distance(  # <class 'graph_tool.PropertyMap'>
            sg.graph,
            weights=sg.graph.ep.distance)
        t_end0_2 = time.time()
        duration0_2 = t_end0_2 - t_begin0_2
        minutes, seconds = divmod(duration0_2, 60)
        print('Calculation of the full distance map took: {} min {} s'.format(
            minutes, seconds))
    else:
        full_dist_map = None

    t_end0 = time.time()
    duration0 = t_end0 - t_begin0
    minutes, seconds = divmod(duration0, 60)
    print('Preparation took: {} min {} s'.format(minutes, seconds))

    # * For all vertices, collecting normal vector votes, calculating
    # average number of the geodesic neighbors and classifying the orientation
    # of each vertex *
    t_begin1 = time.time()
    print("\nRunning modified Vector Voting for all vertices...")

    if eta == 0 and epsilon == 0:
        print("\nFirst pass: estimating normals...")
    else:
        print(
            "\nFirst pass: classifying orientation and estimating normals for"
            " surface patches and tangents for creases...")

    collect_normal_votes = sg.collect_normal_votes
    estimate_normal = sg.estimate_normal
    first_pass = sg.first_pass
    num_v = sg.graph.num_vertices()
    print("number of vertices: {}".format(num_v))
    classes_counts = {}
    vertex = sg.graph.vertex
    vp_orientation_class = sg.graph.vp.orientation_class
    vp_n_v = sg.graph.vp.n_v
    vp_t_v = sg.graph.vp.t_v

    if cores > 1:  # parallel processing
        p = pp.ProcessPool(cores)
        print('Opened a pool with {} processes'.format(cores))
        results_list = p.map(
            partial(first_pass,
                    g_max=g_max,
                    a_max=a_max,
                    sigma=sigma,
                    full_dist_map=full_dist_map),
            list(range(num_v)))  # a list of vertex v indices
        p.close()
        p.clear()

        results_array = np.array(results_list, dtype=object)
        # Calculating average neighbors number:
        num_neighbors_array = results_array[:, 0]
        avg_num_neighbors = np.mean(num_neighbors_array)
        class_v_array = results_array[:, 1]
        n_v_array = results_array[:, 2]
        t_v_array = results_array[:, 3]

        # Adding the estimated properties to the graph and counting classes:
        for i in range(num_v):
            v = vertex(i)
            vp_orientation_class[v] = class_v_array[i]
            vp_n_v[v] = n_v_array[i]
            vp_t_v[v] = t_v_array[i]
            try:
                classes_counts[class_v_array[i]] += 1
            except KeyError:
                classes_counts[class_v_array[i]] = 1

    else:  # cores == 1, sequential processing
        sum_num_neighbors = 0
        for i in range(num_v):
            if sg.__class__.__name__ == "TriangleGraph":
                num_neighbors, V_v = collect_normal_votes(
                    i, g_max, a_max, sigma, full_dist_map=full_dist_map)
            else:  # PointGraph
                num_neighbors, V_v = collect_normal_votes(
                    i, g_max, a_max, sigma)
            sum_num_neighbors += num_neighbors
            class_v, n_v, t_v = estimate_normal(i, V_v, epsilon, eta)
            # Adding the estimated properties to the graph and counting classes:
            v = vertex(i)
            vp_orientation_class[v] = class_v
            vp_n_v[v] = n_v
            vp_t_v[v] = t_v
            try:
                classes_counts[class_v] += 1
            except KeyError:
                classes_counts[class_v] = 1
        avg_num_neighbors = float(sum_num_neighbors) / float(num_v)

    # Printing out some numbers concerning the first pass:
    print("Average number of geodesic neighbors for all vertices: {}".format(
        avg_num_neighbors))
    print("{} surface patches".format(classes_counts[1]))
    if 2 in classes_counts:
        print("{} crease junctions".format(classes_counts[2]))
    if 3 in classes_counts:
        print("{} no preferred orientation".format(classes_counts[3]))

    # Save the graph to a file for use by different methods in the second run:
    sg.graph.save(graph_file)

    t_end1 = time.time()
    duration1 = t_end1 - t_begin1
    minutes, seconds = divmod(duration1, 60)
    print('First pass took: {} min {} s'.format(minutes, seconds))

    if runtimes != '':
        with open(runtimes, 'a') as f:
            f.write("{};{};{};{};{};{};".format(num_v, radius_hit, g_max,
                                                avg_num_neighbors, cores,
                                                duration1))
def main(args):
    outdir = os.path.abspath(args.outdir)
    if not os.path.exists(outdir):
        logger.debug("creating output directory: {}".format(outdir))
        os.mkdir(outdir)
    else:
        logger.debug("using output directory: {}".format(outdir))

    start = timer()
    logger.debug("loading graph from {}. This will take a while...".format(
        args.edges))
    g = graph_tool.load_graph_from_csv(args.edges,
                                       directed=True,
                                       skip_first=True,
                                       csv_options={'delimiter': '\t'})
    logger.debug("done loading graph. Took {}".format(
        format_timespan(timer() - start)))

    start = timer()
    logger.debug("creating dictionary of name to vertices...")
    name_to_v = {g.vp.name[v]: v for v in g.vertices()}
    logger.debug("done loading dictionary. Took {}".format(
        format_timespan(timer() - start)))

    start = timer()
    n_samples = 1000
    random_seed = 999
    logger.debug("getting a sample of {} vertices (random seed: {})".format(
        n_samples, random_seed))
    random_state = np.random.RandomState(random_seed)
    vertices_sample_indexes = random_state.randint(low=0,
                                                   high=len(name_to_v),
                                                   size=n_samples)
    vertices_sample = [g.vertex(x) for x in vertices_sample_indexes]
    # vertices_sample is a list of graph-tools Vertex objects
    logger.debug("done getting random sample. took {}".format(
        format_timespan(timer() - start)))

    # get a unique filename
    i = 0
    while True:
        fname_calc_times = os.path.join(outdir,
                                        'calc_times_{:03}.csv'.format(i))
        if not os.path.exists(fname_calc_times):
            break
        i += 1
    f_calc_times = open(fname_calc_times, 'w', buffering=1)

    sep = ','
    logger.debug("writing header to {}".format(fname_calc_times))
    f_calc_times.write(
        "source_index{sep}source_name{sep}calc_time{sep}distance_fname\n".
        format(sep=sep))

    start = timer()
    logger.debug("starting shortest path calculations...")
    if args.undirected is True:
        logger.debug(
            "treating graph as undirected for shortest distance calculations")
        directed = False
    else:
        directed = None

    for i, source in enumerate(vertices_sample):
        this_start = timer()
        source_name = g.vp.name[source]
        source_index = vertices_sample_indexes[i]
        outfname = "{:012d}.csv".format(
            i)  # filename corresponds to row number of calc_time.csv file
        outfname = os.path.join(outdir, outfname)
        if os.path.exists(outfname):
            logger.debug(
                "filename {} already exists. skipping.".format(outfname))
            continue
        logger.debug(
            "calculating shortest distance for vertex: index: {} | name: {}".
            format(source_index, source_name))
        dist = shortest_distance(g,
                                 source=source,
                                 target=vertices_sample,
                                 directed=directed)
        this_time = timer() - this_start
        with open(outfname, 'w') as outf:
            for x in dist:
                outf.write("{}\n".format(x))
        f_calc_times.write(
            "{source_index}{sep}{source_name}{sep}{calc_time}{sep}{distance_fname}\n"
            .format(sep=sep,
                    source_index=source_index,
                    source_name=source_name,
                    calc_time=this_time,
                    distance_fname=outfname))
    logger.debug("finished shortest path calculations. Took {}".format(
        format_timespan(timer() - start)))
    f_calc_times.close()
Ejemplo n.º 27
0
def main(args):
    outdir = os.path.abspath(args.outdir)
    if not os.path.exists(outdir):
        logger.debug("creating output directory: {}".format(outdir))
        os.mkdir(outdir)
    else:
        logger.debug("using output directory: {}".format(outdir))

    logger.debug("loading file with sample IDs: {}".format(args.sample_ids))
    with open(args.sample_ids, 'r') as f:
        samples = json.load(f, object_pairs_hook=OrderedDict)
    sample_ids, source_ids, discipline_name = build_sample_set(samples, args.discipline_index)
    logger.debug("using discipline {} (index {})".format(discipline_name, args.discipline_index))

    fname_calc_times = get_unique_filename(basename='calc_times', ext='.csv', basedir=outdir)
    f_calc_times = open(fname_calc_times, 'w', buffering=1)
    sep = ','
    logger.debug("writing header to {}".format(fname_calc_times))
    f_calc_times.write("source_name{sep}calc_time{sep}distance_fname\n".format(sep=sep))

    
    start = timer()
    logger.debug("loading graph from {}. This will take a while...".format(args.edges))
    g = load_graph(args.edges)
    logger.debug("done loading graph. Took {}".format(format_timespan(timer()-start)))

    start = timer()
    logger.debug("creating dictionary of name to vertices...")
    name_to_v = get_name_to_vertices_dict(g)
    logger.debug("done loading dictionary. Took {}".format(format_timespan(timer()-start)))

    start = timer()
    logger.debug("starting shortest path calculations...")
    if args.undirected is True:
        logger.debug("treating graph as undirected for shortest distance calculations")
        directed = False
    else:
        directed = None

    vertices_sample = [name_to_v[wos_id] for wos_id in sample_ids]
    vertices_source = [name_to_v[wos_id] for wos_id in source_ids]
    logger.debug("number of sample vertices: {}".format(len(vertices_sample)))

    for i, source in enumerate(vertices_source):
        this_start = timer()
        source_name = g.vp.name[source]
        outfname = "{:012d}.csv".format(i)  
        outfname = os.path.join(outdir, outfname)
        if os.path.exists(outfname):
            logger.debug("filename {} already exists. skipping.".format(outfname))
        else:
            logger.debug("calculating shortest distance for vertex: name: {}".format(source_name))
            dist = shortest_distance(g, source=source, target=vertices_sample, directed=directed)
            this_time = timer() - this_start
            with open(outfname, 'w') as outf:
                for i_dist, x in enumerate(dist):
                    # outf.write("{}\n".format(x))
                    outf.write("{source_name}{sep}{target_name}{sep}{distance}\n".format(sep=sep, source_name=source_name, target_name=sample_ids[i_dist], distance=x))
            f_calc_times.write("{source_name}{sep}{calc_time}{sep}{distance_fname}\n".format(sep=sep, source_name=source_name, calc_time=this_time, distance_fname=os.path.basename(outfname)))
        vertices_sample = vertices_sample[1:]  # vertices to process will shrink by one each time through the loop. FIFO.
        sample_ids = sample_ids[1:]

    logger.debug("finished shortest path calculations. Took {}".format(format_timespan(timer()-start)))
    f_calc_times.close()
Ejemplo n.º 28
0
def label_item_inheritance(g, item_emergence_item_prop):
    """label_item_inheritance
    Creates property maps that labels vertices as containing recombining or
    reconducting items. An item is recombining for a community if it exists
    previously in the phylomemetic network, but not in a direct ancestor of 
    the community. An item is reconducting for a community if it exists in a
    direct parent of the community.
    
    Parameters
    ----------
        g : :obj:`graph_tool.Graph` A graph.
        item_emergence_item_prop : :obj:`graph_tool.VertexPropertyMap` 
            Items that are emerging as calculated by label_item_emergence.
    Returns
    -------
        item_recombination_count : :obj:`graph_tool.VertexPropertyMap` 
            Property map with values representing the number of recombining 
            items in a vertex.
        item_recombination_item : :obj:`graph_tool.VertexPropertyMap` 
            Property map with a list of recombining items for each vertex.
        item_reconduction_count : :obj:`graph_tool.VertexPropertyMap` 
            Property map with values representing the number of reconducting 
            items in a vertex.
            
        item_reconduction_item : :obj:`graph_tool.PropertyMap` Property map
            with a list of reconducting items for each vertex.
    """
    item_reconduction_count = g.new_vertex_property('int')
    item_reconduction_item = g.new_vertex_property('vector<int>')

    item_recombination_count = g.new_vertex_property('int')
    item_recombination_item = g.new_vertex_property('vector<int>')

    vertex_parents = {}
    vertex_predecessors = {}

    for term, vertices in reverse_mapping(g).items():
        for v in vertices:
            if term not in item_emergence_item_prop[v]:
                if v not in vertex_parents:
                    dist_map, pred_map, pred_visited = shortest_distance(
                        g,
                        source=v,
                        pred_map=True,
                        return_reached=True,
                    )
                    parents = set([g.vertex(p) for p in pred_visited])
                    vertex_parents[v] = parents
                else:
                    parents = vertex_parents[v]

                if v not in vertex_predecessors:
                    label = g.vp['label'][v]
                    predecessors = set(
                        [p for p in vertices if g.vp['label'][p] < label])
                    vertex_predecessors[v] = predecessors
                else:
                    predecessors = vertex_predecessors[v]

                overlap = predecessors.intersection(parents)
                n_overlap = len(overlap)

                if n_overlap > 0:
                    item_reconduction_item[v].append(term)
                    item_reconduction_count[v] += 1
                elif n_overlap == 0:
                    item_recombination_item[v].append(term)
                    item_recombination_count[v] += 1
    return (item_recombination_count, item_recombination_item,
            item_reconduction_count, item_reconduction_item)
Ejemplo n.º 29
0
def distances(g: Graph):
    dist = shortest_distance(g, directed=g.is_directed())
    return np.array([[y for y in x] for x in dist], dtype=int)
Ejemplo n.º 30
0
    def calculate_density(self,
                          size,
                          scale,
                          mask=None,
                          target_coordinates=None,
                          verbose=False):
        """
        Calculates ribosome density for each membrane graph vertex.

        Calculates shortest geodesic distances (d) for each vertex in the graph
        to each reachable ribosome center mapped on the membrane given by a
        binary mask with coordinates in pixels or an array of coordinates in
        given units.
        Then, calculates a density measure of ribosomes at each vertex or
        membrane voxel: D = sum {over all reachable ribosomes} (1 / (d + 1)).
        Adds the density as vertex PropertyMap to the graph. Returns an array
        with the same shape as the underlying segmentation with the densities
        plus 1, in order to distinguish membrane voxels with 0 density from the
        background.

        Args:
            size (tuple): size in voxels (X, Y, Z) of the original segmentation
            scale (tuple): pixel size (X, Y, Z) in given units of the original
                segmentation
            mask (numpy.ndarray, optional): a binary mask of the ribosome
                centers as 3D array where indices are coordinates in pixels
                (default None)
            target_coordinates (numpy.ndarray, optional): the ribosome centers
                coordinates in given units as 2D array in format
                [[x1, y1, z1], [x2, y2, z2], ...] (default None)
            verbose (boolean, optional): if True (default False), some extra
                information will be printed out

        Returns:
            a 3D numpy ndarray with the densities + 1

        Note:
            One of the two parameters, mask or target_coordinates, has to be
            given.
        """
        from . import ribosome_density as rd
        # If a mask is given, find the set of voxels of ribosome centers mapped
        # on the membrane, 'target_voxels', and rescale them to units,
        # 'target_coordinates':
        if mask is not None:
            if mask.shape != size:
                raise pexceptions.PySegInputError(
                    expr='calculate_density (SegmentationGraph)',
                    msg=("Size of the input 'mask' have to be equal to those "
                         "set during the generation of the graph."))
            # output as a list of tuples [(x1,y1,z1), (x2,y2,z2), ...] in pixels
            target_voxels = rd.get_foreground_voxels_from_mask(mask)
            # for rescaling have to convert to an ndarray
            target_ndarray_voxels = rd.tupel_list_to_ndarray_voxels(
                target_voxels)
            # rescale to units, output an ndarray [[x1,y1,z1], [x2,y2,z2], ...]
            target_ndarray_coordinates = (target_ndarray_voxels *
                                          np.asarray(scale))
            # convert to a list of tuples, which are in units now
            target_coordinates = rd.ndarray_voxels_to_tupel_list(
                target_ndarray_coordinates)
        # If target_coordinates are given (in units), convert them from a numpy
        # ndarray to a list of tuples:
        elif target_coordinates is not None:
            target_coordinates = rd.ndarray_voxels_to_tupel_list(
                target_coordinates)
        # Exit if the target_voxels list is empty:
        if len(target_coordinates) == 0:
            raise pexceptions.PySegInputError(
                expr='calculate_density (SegmentationGraph)',
                msg="No target voxels were found! Check your input ('mask' or "
                "'target_coordinates').")
        print('{} target voxels'.format(len(target_coordinates)))
        if verbose:
            print(target_coordinates)

        # Pre-filter the target coordinates to those existing in the graph
        # (should already all be in the graph, but just in case):
        target_coordinates_in_graph = []
        for target_xyz in target_coordinates:
            if target_xyz in self.coordinates_to_vertex_index:
                target_coordinates_in_graph.append(target_xyz)
            else:
                raise pexceptions.PySegInputWarning(
                    expr='calculate_density (SegmentationGraph)',
                    msg=('Target ({}, {}, {}) not inside the membrane!'.format(
                        target_xyz[0], target_xyz[1], target_xyz[2])))

        print('{} target coordinates in graph'.format(
            len(target_coordinates_in_graph)))
        if verbose:
            print(target_coordinates_in_graph)

        # Get all indices of the target coordinates:
        target_vertices_indices = []
        for target_xyz in target_coordinates_in_graph:
            v_target_index = self.coordinates_to_vertex_index[target_xyz]
            target_vertices_indices.append(v_target_index)

        # Density calculation
        # Add a new vertex property to the graph, density:
        self.graph.vp.density = self.graph.new_vertex_property("float")
        # Dictionary mapping voxel coordinates (for the volume returned later)
        # to a list of density values falling within that voxel:
        voxel_to_densities = {}

        # For each vertex in the graph:
        for v_membrane in self.graph.vertices():
            # Get its coordinates:
            membrane_xyz = self.graph.vp.xyz[v_membrane]
            if verbose:
                print('Membrane vertex ({}, {}, {})'.format(
                    membrane_xyz[0], membrane_xyz[1], membrane_xyz[2]))
            # Get a distance map with all pairs of distances between current
            # graph vertex (membrane_xyz) and target vertices (ribosome
            # coordinates):
            dist_map = shortest_distance(self.graph,
                                         source=v_membrane,
                                         target=target_vertices_indices,
                                         weights=self.graph.ep.distance)

            # Iterate over all shortest distances from the membrane vertex to
            # the target vertices, while calculating the density:
            # Initializing: membrane coordinates with no reachable ribosomes
            # will have a value of 0, those with reachable ribosomes > 0.
            density = 0
            # If there is only one target voxel, dist_map is a single value -
            # wrap it into a list.
            if len(target_coordinates_in_graph) == 1:
                dist_map = [dist_map]
            for d in dist_map:
                if verbose:
                    print('\tTarget vertex ...')
                # if unreachable, the maximum float64 is stored
                if d == np.finfo(np.float64).max:
                    if verbose:
                        print('\t\tunreachable')
                else:
                    if verbose:
                        print('\t\td = {}'.format(d))
                    density += 1 / (d + 1)

            # Add the density of the membrane vertex as a property of the
            # current vertex in the graph:
            self.graph.vp.density[v_membrane] = density

            # Calculate the corresponding voxel of the vertex and add the
            # density to the list keyed by the voxel in the dictionary:
            # Scaling the coordinates back from units to voxels. (Without round
            # float coordinates are truncated to the next lowest integer.)
            voxel_x = int(round(membrane_xyz[0] / scale[0]))
            voxel_y = int(round(membrane_xyz[1] / scale[1]))
            voxel_z = int(round(membrane_xyz[2] / scale[2]))
            voxel = (voxel_x, voxel_y, voxel_z)
            if voxel in voxel_to_densities:
                voxel_to_densities[voxel].append(density)
            else:
                voxel_to_densities[voxel] = [density]

            if verbose:
                print('\tdensity = {}'.format(density))
            if (self.graph.vertex_index[v_membrane] + 1) % 1000 == 0:
                now = datetime.now()
                print('{} membrane vertices processed on: {}-{}-{} {}:{}:{}'.
                      format(self.graph.vertex_index[v_membrane] + 1, now.year,
                             now.month, now.day, now.hour, now.minute,
                             now.second))

        # Initialize an array scaled like the original segmentation, which will
        # hold in each membrane voxel the maximal density among the
        # corresponding vertex coordinates in the graph plus 1 and 0 in each
        # background (non-membrane) voxel:
        densities = np.zeros(size, dtype=np.float16)
        # The densities array membrane voxels are initialized with 1 in order to
        # distinguish membrane voxels from the background.
        for voxel in voxel_to_densities:
            densities[voxel[0], voxel[1],
                      voxel[2]] = 1 + max(voxel_to_densities[voxel])
        if verbose:
            print('densities:\n{}'.format(densities))
        return densities
Ejemplo n.º 31
0
 def reset(self, v_start, v_goal):
     self.shortest_dist = shortest_distance(
         self.graph, v_start, v_goal, directed=True
     )
Ejemplo n.º 32
0
 def calc_distance(self, src, dest):
     return shortest_distance(self.graph, self.planets[src],
                              self.planets[dest])