def _DotNode(self, node): """Returns a graphviz node description for a given node. Args: node: (RequestNode) Returns: A string describing the resource in graphviz format. The resource is color-coded according to its content type, and its shape is oval if its max-age is less than 300s (or if it's not cacheable). """ color = self._ContentTypeToColor(node.request.GetContentType()) request = node.request max_age = request.MaxAge() shape = 'polygon' if max_age > 300 else 'oval' styles = ['filled'] if node.is_ad or node.is_tracking: styles += ['bold', 'diagonals'] return ('"%s" [label = "%s\\n%.2f->%.2f (%.2f)"; style = "%s"; ' 'fillcolor = %s; shape = %s];\n' % (request.request_id, request_track.ShortName(request.url), request.start_msec - self._global_start, request.end_msec - self._global_start, request.end_msec - request.start_msec, ','.join(styles), color, shape))
def ShortName(self): """Returns either the hostname of the resource, or the filename, or the end of the path. Tries to include the domain as much as possible. """ if self._shortname: return self._shortname return request_track.ShortName(self._request.url)
def DoCost(arg_str): OPTIONS.ParseArgs(arg_str, description='Calculates total cost', extra=['request_json', ('--path', False)]) graph_view = _ProcessTraceFile(OPTIONS.request_json) path_list = [] print 'Graph cost: %s' % graph_view.deps_graph.Cost(path_list=path_list) if OPTIONS.path: for n in path_list: print ' ' + request_track.ShortName(n.request.url)
def _SplitChildrenByTime(self, parent): """Splits children of a node by request times. The initiator of a request may not be the true dependency of a request. For example, a script may appear to load several resources independently, but in fact one of them may be a JSON data file, and the remaining resources assets described in the JSON. The assets should be dependent upon the JSON data file, and not the original script. This function approximates that by rearranging the children of a node according to their request times. The predecessor of each child is made to be the node with the greatest finishing time, that is before the start time of the child. We do this by sorting the nodes twice, once by start time and once by end time. We mark the earliest end time, and then we walk the start time list, advancing the end time mark when it is less than our current start time. This is refined by only considering assets which we believe actually create a dependency. We only split if the original parent is a script, and the new parent a data file. We incorporate this heuristic by skipping over any non-script/json resources when moving the end mark. TODO(mattcary): More heuristics, like incorporating cachability somehow, and not just picking arbitrarily if there are two nodes with the same end time (does that ever really happen?) Args: parent: (_RequestNode) The children of this node are processed by this function. """ if parent.request.GetContentType() not in self._CAN_BE_TIMING_PARENT: return edges = self._deps_graph.OutEdges(parent) edges_by_start_time = sorted( edges, key=lambda e: e.to_node.request.start_msec) edges_by_end_time = sorted(edges, key=lambda e: e.to_node.request.end_msec) end_mark = 0 for current in edges_by_start_time: assert current.from_node is parent if current.to_node.request.start_msec < parent.request.end_msec - 1e-5: parent_url = parent.request.url child_url = current.to_node.request.url logging.warning( 'Child loaded before parent finished: %s -> %s', request_track.ShortName(parent_url), request_track.ShortName(child_url)) go_to_next_child = False while end_mark < len(edges_by_end_time): if edges_by_end_time[end_mark] == current: go_to_next_child = True break elif (edges_by_end_time[end_mark].to_node.request. GetContentType() not in self._CAN_MAKE_TIMING_DEPENDENCE): end_mark += 1 elif (end_mark < len(edges_by_end_time) - 1 and edges_by_end_time[end_mark + 1].to_node.request.end_msec < current.to_node.request.start_msec): end_mark += 1 else: break if end_mark >= len(edges_by_end_time): break # It's not possible to rearrange any more children. if go_to_next_child: continue # We can't rearrange this child, but the next child may be # eligible. if (edges_by_end_time[end_mark].to_node.request.end_msec <= current.to_node.request.start_msec): current.is_timing = True self._deps_graph.UpdateEdge( current, edges_by_end_time[end_mark].to_node, current.to_node)