def test_graph_computation_uses_only_latest_computed_visits(self): create_location_visit( compute_index=0, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( compute_index=1, url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( compute_index=1, url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) self.assertEqual(NavigationEdge.select().count(), 3) edges = NavigationEdge.select() transition_list = [(e.source_vertex.page_type, e.target_vertex.page_type) for e in edges] self.assertIn(("page_type_2", "page_type_2"), transition_list)
def test_filter_to_only_one_concern_if_concern_index_provided(self): # This event should be ignored create_location_visit( concern_index=0, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( concern_index=0, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) # This event should be captured create_location_visit( concern_index=1, url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) create_location_visit( concern_index=1, url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 7, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 8, 0), ) compute_navigation_graph(concern_index=1, page_type_lookup=PAGE_TYPE_LOOKUP) self.assertEqual(NavigationEdge.select().count(), 3) edges = NavigationEdge.select() transition_list = [(e.source_vertex.page_type, e.target_vertex.page_type) for e in edges] self.assertIn(("page_type_2", "page_type_2"), transition_list)
def test_edge_added_between_all_consecutive_visits(self): create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) edges = NavigationEdge.select() # There will be 4 edges: # * 2 for the transitions between the 3 URLs above # * 1 for the transition from "Start" to the first URL # * 1 for the transition from the last URL to "End" self.assertEqual(edges.count(), 4) edge_page_type_pairs = [ (edge.source_vertex.page_type, edge.target_vertex.page_type) for edge in edges ] self.assertIn(("Start", "page_type_1"), edge_page_type_pairs) self.assertIn(("page_type_1", "page_type_1"), edge_page_type_pairs) self.assertIn(("page_type_1", "page_type_2"), edge_page_type_pairs) self.assertIn(("page_type_2", "End"), edge_page_type_pairs)
def test_include_all_concerns_if_no_concern_index_provided(self): # Both events should be captured create_location_visit( concern_index=0, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( concern_index=0, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( concern_index=1, url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) create_location_visit( concern_index=1, url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 7, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 8, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) self.assertEqual(NavigationEdge.select().count(), 6)
def test_edge_occurrences_counts_number_of_transitions_between_page_types( self): create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) create_location_visit( url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 7, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 8, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) edges = NavigationEdge.select() edge_dict = {(edge.source_vertex.page_type, edge.target_vertex.page_type): edge for edge in edges} self.assertEqual(edge_dict[('Start', 'page_type_1')].occurrences, 1) self.assertEqual(edge_dict[('page_type_1', 'page_type_1')].occurrences, 2) self.assertEqual(edge_dict[('page_type_1', 'page_type_2')].occurrences, 1) self.assertEqual(edge_dict[('page_type_2', 'End')].occurrences, 1)
def test_edge_added_between_all_consecutive_visits(self): create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) edges = NavigationEdge.select() # There will be 4 edges: # * 2 for the transitions between the 3 URLs above # * 1 for the transition from "Start" to the first URL # * 1 for the transition from the last URL to "End" self.assertEqual(edges.count(), 4) edge_page_type_pairs = [(edge.source_vertex.page_type, edge.target_vertex.page_type) for edge in edges] self.assertIn(("Start", "page_type_1"), edge_page_type_pairs) self.assertIn(("page_type_1", "page_type_1"), edge_page_type_pairs) self.assertIn(("page_type_1", "page_type_2"), edge_page_type_pairs) self.assertIn(("page_type_2", "End"), edge_page_type_pairs)
def test_edge_occurrences_counts_number_of_transitions_between_page_types(self): create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) create_location_visit( url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 7, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 8, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) edges = NavigationEdge.select() edge_dict = { (edge.source_vertex.page_type, edge.target_vertex.page_type): edge for edge in edges } self.assertEqual(edge_dict[('Start', 'page_type_1')].occurrences, 1) self.assertEqual(edge_dict[('page_type_1', 'page_type_1')].occurrences, 2) self.assertEqual(edge_dict[('page_type_1', 'page_type_2')].occurrences, 1) self.assertEqual(edge_dict[('page_type_2', 'End')].occurrences, 1)
def test_graph_skips_redirects(self): # Because redirects typically don't show any content but are just a gateway to # another page, we will leave them out of the graph of navigation. It's more # meaningful to connect the link before it, and the link that it points to. create_location_visit( url="redirect", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) # There should only be one edge---from "Start" to "End" self.assertEqual(NavigationEdge.select().count(), 1)
def test_edge_transition_probabilities_normalize_occurrences(self): create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) create_location_visit( url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 7, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 8, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 9, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 10, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) edges = NavigationEdge.select() edge_dict = {(edge.source_vertex.page_type, edge.target_vertex.page_type): edge for edge in edges} self.assertAlmostEqual( edge_dict[('page_type_1', 'page_type_1')].probability, float(1) / 2) self.assertAlmostEqual( edge_dict[('page_type_1', 'page_type_2')].probability, float(1) / 4) self.assertAlmostEqual(edge_dict[('page_type_1', 'End')].probability, float(1) / 4) self.assertAlmostEqual( edge_dict[('page_type_2', 'page_type_1')].probability, 1)
def test_edge_not_added_between_concerns_for_the_same_participant(self): create_location_visit( concern_index=0, url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( concern_index=1, url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) # 4 edges should have been created---between the Start vertex, the one URL, and # the End vertex for each of the concerns self.assertEqual(NavigationEdge.select().count(), 4)
def test_edge_transition_probabilities_normalize_occurrences(self): create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 1, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 2, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 3, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 4, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 5, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 6, 0), ) create_location_visit( url="page2", start=datetime.datetime(2000, 1, 1, 12, 0, 7, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 8, 0), ) create_location_visit( url="page1", start=datetime.datetime(2000, 1, 1, 12, 0, 9, 0), end=datetime.datetime(2000, 1, 1, 12, 0, 10, 0), ) compute_navigation_graph(page_type_lookup=PAGE_TYPE_LOOKUP) edges = NavigationEdge.select() edge_dict = { (edge.source_vertex.page_type, edge.target_vertex.page_type): edge for edge in edges } self.assertAlmostEqual(edge_dict[('page_type_1', 'page_type_1')].probability, float(1) / 2) self.assertAlmostEqual(edge_dict[('page_type_1', 'page_type_2')].probability, float(1) / 4) self.assertAlmostEqual(edge_dict[('page_type_1', 'End')].probability, float(1) / 4) self.assertAlmostEqual(edge_dict[('page_type_2', 'page_type_1')].probability, 1)
def compute_navigation_graph(page_type_lookup, exclude_users=None, show_progress=False, concern_index=None): exclude_users = [] if exclude_users is None else exclude_users # Create a new index for this computation last_compute_index = NavigationVertex.select( fn.Max(NavigationVertex.compute_index)).scalar() or 0 compute_index = last_compute_index + 1 # Fetch the set of visits for the most recently computed visits visit_compute_index = LocationVisit.select( fn.Max(LocationVisit.compute_index)).scalar() visits = LocationVisit.select().where( LocationVisit.compute_index == visit_compute_index) # If the user has provided a concern index that they want to compute the graph for, # then restrict navigation data to only that concern if concern_index is not None: visits = visits.where(LocationVisit.concern_index == concern_index) # Get the distinct participant IDs and concern indexes # Exclude any users that were not requested as part of the analysis participant_ids = set([ visit.user_id for visit in visits if visit.user_id not in exclude_users ]) concern_indexes = set([visit.concern_index for visit in visits]) # Set up progress bar. total_iterations_count = len(participant_ids) * len(concern_indexes) if show_progress: progress_bar = ProgressBar( maxval=total_iterations_count, widgets=[ 'Progress: ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA(), ' Read ', Counter(), ' / ' + str(total_iterations_count) + ' sessions.' ]) progress_bar.start() # The list of vertices needs to be populated with a start and end node. # All navigation behavior starts at the "Start" node, and ends at the "End" node vertices = { "Start": Vertex("Start", occurrences=1), "End": Vertex("End", occurrences=1), } edges = {} last_vertex = vertices["Start"] iterations_count = 0 # Go through every concern for every participant. For each page they visit, # increment the visits to the corresponding vertex. For each transition from one # page to the next, increment the occurrence of a transition between two page types. for participant_id in participant_ids: for concern_index in concern_indexes: participant_concern_visits = visits.where( LocationVisit.user_id == participant_id, LocationVisit.concern_index == concern_index, ).order_by(LocationVisit.start.asc()) for visit in participant_concern_visits: # Get the type of the page visited standardized_url = standardize_url(visit.url) if standardized_url in page_type_lookup: url_info = page_type_lookup[standardized_url] page_type = url_info['main_type'] # If this is a redirect, then just skip it. It's more important # to link the URL before it to the link the redirect points to. if url_info['redirect']: continue else: logger.warn( "URL %s not in page type lookup. Giving it 'Unknown' type", standardized_url) page_type = "Unknown" # Add a new vertex for this page type if it doesn't exist if page_type not in vertices: vertices[page_type] = Vertex(page_type) # Save that we have seen this page type one more time vertex = vertices[page_type] vertex.occurrences += 1 # Add the time spent to the total time spent for this page type time_passed = visit.end - visit.start seconds = time_passed.seconds + (time_passed.microseconds / float(1000000)) vertex.total_time += seconds # Connect an edge between the last page visited and this one if (last_vertex.page_type, vertex.page_type) not in edges: edges[(last_vertex.page_type, vertex.page_type)] = Edge(last_vertex, vertex) edge = edges[(last_vertex.page_type, vertex.page_type)] edge.occurrences += 1 # Redefine the last page so we know in the next iteration what was just visited. last_vertex = vertex # After each participant or each concern, connect from the last URL to the end vertex end_vertex = vertices['End'] if (last_vertex.page_type, end_vertex.page_type) not in edges: edges[(last_vertex.page_type, end_vertex.page_type)] = Edge(last_vertex, end_vertex) edge = edges[(last_vertex.page_type, end_vertex.page_type)] edge.occurrences += 1 # After each participant or each concern, we reset the last_page_type to "Start" last_vertex = vertices['Start'] if show_progress: iterations_count += 1 progress_bar.update(iterations_count) # Compute the mean time spent on each vertex for vertex in vertices.values(): vertex.mean_time = vertex.total_time / float(vertex.occurrences) # Compute the transition probability for each edge leaving a vertex. # First, group all edges by their source vertex get_source_page_type = lambda (source_type, target_type): source_type sorted_edge_keys = sorted(edges.keys(), key=get_source_page_type) edge_groups = itertools.groupby(sorted_edge_keys, get_source_page_type) for _, edge_group in edge_groups: # Fetch those edges in the current group # (Thos in the current group share the same source.) edge_keys = [_ for _ in edge_group] group_edges = dict(filter(lambda (k, v): k in edge_keys, edges.items())) # Compute the probability of each edge being taken total_occurrences = sum([e.occurrences for e in group_edges.values()]) for edge in group_edges.values(): edge.probability = float(edge.occurrences) / total_occurrences # Save all vertices to the database vertex_models = {} for vertex in vertices.values(): vertex_model = NavigationVertex.create( compute_index=compute_index, page_type=vertex.page_type, occurrences=vertex.occurrences, total_time=vertex.total_time, mean_time=vertex.mean_time, ) # We store a dictionary from page type to vertex model so # we can look up these models when saving the edges. vertex_models[vertex.page_type] = vertex_model # Save all edges to the database # We use a progress bar for this as there might be a lot of edges and # we upload each of them separately to the database. if show_progress: progress_bar = ProgressBar(maxval=len(edges), widgets=[ 'Progress: ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA(), ' Updated graph with ', Counter(), ' / ' + str(len(edges)) + ' edges.' ]) progress_bar.start() for edge_index, edge in enumerate(edges.values(), start=1): NavigationEdge.create( compute_index=compute_index, source_vertex=vertex_models[edge.source_vertex.page_type], target_vertex=vertex_models[edge.target_vertex.page_type], occurrences=edge.occurrences, probability=edge.probability, ) if show_progress: progress_bar.update(edge_index) if show_progress: progress_bar.finish() if show_progress: progress_bar.finish()
def main(compute_index, output_format, *args, **kwargs): # Attempt to import graph_tool, and share a helpful debugging message if it's not found. try: import graph_tool.all as gt except ImportError as e: print str(e) print '\n'.join([ "", "ERROR: The \"graph_tool\" module could not be imported.", "Install the package and then point to it with PYTHONPATH.", "", "Details: graph-tool isn't required for most scripts in this repository.", "But it's needed to draw graphs in *this* script. To download this", "package, see the download instructions on the graph-tool website:", "", "https://graph-tool.skewed.de/download", "" "Note: it's not enough to install \"graph_tool\" through pip.", "It relies on C++ libraries for accelerated graph routines.", "You'll have to use your system package manager or compile from scratch.", "", ]) raise SystemExit # This is the graph that we'll construct graph = gt.Graph() # These data structures hold links to the vertices, edges, and their properties vertices = {} vertex_page_types = [] vertex_total_times = [] vertex_mean_times = [] vertex_occurrences = [] edge_occurrences = [] edge_probabilities = [] # Fetch the set of graph data from the round of computation that the caller wants, # or from the most recent graph if a version hasn't been provided. # Note that the compute_index should be the same for the vertex and edge data, so we # look it up using the same index. if compute_index is None: compute_index = NavigationVertex.select(fn.Max(NavigationVertex.compute_index)).scalar() vertex_models = NavigationVertex.select().where(NavigationVertex.compute_index == compute_index) edge_models = NavigationEdge.select().where(NavigationEdge.compute_index == compute_index) # Add vertices to graph and save vertex properties for vertex_model in vertex_models: # Add a vertex to the graph and save its properties vertex = graph.add_vertex() vertices[vertex_model.id] = vertex vertex_page_types.append(vertex_model.page_type) vertex_total_times.append(vertex_model.total_time) vertex_mean_times.append(vertex_model.mean_time) vertex_occurrences.append(vertex_model.occurrences) # Add edges to the graph and save their properties for edge_model in edge_models: graph.add_edge( # We look up vertices using the '_vertex_id' properties because this is already # retrieved in the fetched rows. Note that if we want to look it up by # page type, this will require two extra queries to the database (one for # each vertex) for each edge added, which is very costly. vertices[edge_model.source_vertex_id], vertices[edge_model.target_vertex_id], ) edge_occurrences.append(edge_model.occurrences) edge_probabilities.append(edge_model.probability) # Fix the positions and colors of the first and final vertices vertex_positions = [] vertex_pins = [] vertex_colors = [] for page_type in vertex_page_types: if page_type == 'Start': vertex_positions.append([0.5, 3]) vertex_pins.append(True) vertex_colors.append("#b2f3ba") # light green elif page_type == 'End': vertex_positions.append([9.5, 3]) vertex_pins.append(True) vertex_colors.append("#f3a4a7") # light red else: vertex_positions.append([5, 3]) vertex_pins.append(False) vertex_colors.append("white") vertex_position_property =\ graph.new_vertex_property(str("vector<double>"), vals=vertex_positions) vertex_pin_property = graph.new_vertex_property(str("boolean"), vals=vertex_pins) vertex_color_property = graph.new_vertex_property(str("string"), vals=vertex_colors) # Because we're using unicode literals, each of the "value types" need to be coerced # to a string explicitly before creating new properties. # When making labels, we take advantage of the fact that most page types only have one # space, and usually they should be split into two new lines if they have a space. split_page_type_names = [_.replace(' ', '\n') for _ in vertex_page_types] vertex_labels = graph.new_vertex_property(str("string"), vals=split_page_type_names) # Determine vertex size based on frequently they have occurred. # While larger size means more visits, the relationship isn't linear. # The "log" is necessary to make sure that the difference isn't too severe between vertices. # This was hand-tailored to just look good. # vertex_occurrences_array = np.array(vertex_occurrences) # vertex_size_array = np.log((vertex_occurrences_array * float(10)) / np.max(vertex_occurrences)) # noqa # small_vertex_indexes = vertex_size_array < MINIMUM_VERTEX_SIZE # vertex_size_array[small_vertex_indexes] = MINIMUM_VERTEX_SIZE # vertex_sizes = graph.new_vertex_property(str("float"), vals=vertex_size_array) # Compute the font sizes to scale with vertex size. # This was hand-tailored to just look good too. # font_size_array = vertex_size_array * 10 # small_font_indexes = font_size_array < MINIMUM_FONT_SIZE # font_size_array[small_font_indexes] = MINIMUM_FONT_SIZE # vertex_font_sizes = graph.new_vertex_property(str("double"), vals=font_size_array) # Edge label is determined by the probability that it is taken edge_labels = graph.new_edge_property(str("float"), vals=np.round(edge_probabilities, 2)) # Edge thickness is determined by how likely a participant was to follow that transition edge_widths = graph.new_edge_property( str("float"), vals=[p * EDGE_PEN_WIDTH_PER_PROBABILITY for p in edge_probabilities], ) # Show only the top most frequently visited page types vertex_occurrences_array = np.array(vertex_occurrences) is_vertex_frequent = vertex_occurrences_array >=\ np.percentile(vertex_occurrences_array, PAGE_TYPE_PERCENTILE) is_vertex_start_or_end = np.logical_or( np.array(vertex_page_types) == "Start", np.array(vertex_page_types) == "End" ) show_vertex = np.logical_or(is_vertex_frequent, is_vertex_start_or_end) vertex_filter = graph.new_vertex_property(str("boolean"), vals=show_vertex) graph.set_vertex_filter(vertex_filter) # Show only the top most taken transitions # This uses two conditions: # First, the transition has to have been taken a large number of times---the # number of occurrences must be within a certain percentile of all occurrences taken # Second, the transition has to have a certain minimum probability of occurring # edge_occurrences_array = np.array(edge_occurrences) edge_probabilities_array = np.array(edge_probabilities) # does_edge_occur_often = edge_occurrences_array >=\ # np.percentile(edge_occurrences_array, TRANSITION_PERCENTILE) does_edge_have_high_probability = edge_probabilities_array >= TRANSITION_PERCENTAGE_THRESHOLD # is_edge_frequent = np.logical_and(does_edge_occur_often, does_edge_have_high_probability) edge_filter = graph.new_edge_property(str("boolean"), vals=does_edge_have_high_probability) graph.set_edge_filter(edge_filter) # Create a new filename for the output that includes the index of the version of # data that was used when drawing it. output_filename = make_dump_filename( __name__ + "_compute_index_" + str(compute_index), "." + output_format, ) # Draw the graph gt.graphviz_draw( graph, size=(30, 15), # resulting image should be about 30cm by 15cm overlap=False, # nodes should not be drawn on top of each other elen=.5, # edges should be ~1/2 in. long penwidth=edge_widths, # edge thickness # vsize=vertex_sizes, # vertex sizes vsize=MINIMUM_VERTEX_SIZE, # vertex sizes layout='fdp', # this layout engine lets us set positions of start and end pin=vertex_pin_property, # pins the positions for some vertices pos=vertex_position_property, # set the position of some vertices vcolor=vertex_color_property, # For reference about graphviz vertex and edge properties in the next # two dictionaries, see this page: # http://www.graphviz.org/doc/info/attrs.html gprops={ 'rankdir': "LR", # layout the vertices from left to right 'splines': 'curved', }, vprops={ # 'fontsize': vertex_font_sizes,# size of labels 'fontsize': MINIMUM_FONT_SIZE, # size of labels 'label': vertex_labels, # text of labels 'shape': 'circle', 'fixedsize': 'shape', # don't scale vertices to fit text (looks weird) }, eprops={ 'xlabel': edge_labels, # xlabel (instead of label) distances labels from edges 'fontsize': 6.0, # Surprisingly, we have to explicitly set these arrow properties # to make sure taht edges appear with a direction 'arrowhead': 'normal', 'dir': 'forward', }, output=output_filename, output_format=output_format, )