def _AllCores(prefix, graph_set_names, output, threshold): """Compute all core sets (per-set and overall page core) for a site.""" core_sets = [] _Progress('Using threshold %s' % threshold) big_sack = resource_sack.GraphSack() graph_sets = [] for name in graph_set_names: _Progress('Finding core set for %s' % name) sack = resource_sack.GraphSack() sack.CORE_THRESHOLD = threshold this_set = [] for filename in glob.iglob('-'.join([prefix, name, '*.trace'])): _Progress('Reading %s' % filename) trace = loading_trace.LoadingTrace.FromJsonFile(filename) graph = dependency_graph.RequestDependencyGraph( trace.request_track.GetEvents(), request_dependencies_lens.RequestDependencyLens(trace)) sack.ConsumeGraph(graph) big_sack.ConsumeGraph(graph) this_set.append(graph) core_sets.append({ 'set_name': name, 'core_set': [l for l in sack.CoreSet()] }) graph_sets.append(this_set) json.dump( { 'core_sets': core_sets, 'page_core': [l for l in big_sack.CoreSet(*graph_sets)] }, output, sort_keys=True, indent=2)
def _PageCore(prefix, graph_set_names, output): """Compute the page core over sets defined by graph_set_names.""" assert graph_set_names graph_sets = [] sack = resource_sack.GraphSack() for name in graph_set_names: name_graphs = [] _Progress('Processing %s' % name) for filename in glob.iglob('-'.join([prefix, name, '*.trace'])): _Progress('Reading %s' % filename) trace = loading_trace.LoadingTrace.FromJsonFile(filename) graph = dependency_graph.RequestDependencyGraph( trace.request_track.GetEvents(), request_dependencies_lens.RequestDependencyLens(trace)) sack.ConsumeGraph(graph) name_graphs.append(graph) graph_sets.append(name_graphs) core = sack.CoreSet(*graph_sets) json.dump({'page_core': [{'label': b.label, 'name': b.name, 'count': b.num_nodes} for b in core], 'non_core': [{'label': b.label, 'name': b.name, 'count': b.num_nodes} for b in sack.bags if b not in core], 'threshold': sack.CORE_THRESHOLD}, output, sort_keys=True, indent=2) output.write('\n')
def test_Shortname(self): root = MakeRequest(0, 'null') shortname = MakeRequest(1, 0) shortname.url = 'data:fake/content;' + 'lotsand' * 50 + 'lotsofdata' g1 = TestDependencyGraph([root, shortname]) sack = resource_sack.GraphSack() sack.ConsumeGraph(g1) self.assertEqual(set(['0/', 'data:fake/content']), set([bag.label for bag in sack.bags]))
def test_Core(self): # We will use a core threshold of 0.5 to make it easier to define # graphs. Resources 0 and 1 are core and others are not. graphs = [ self.SimpleGraph([0, 1, 2]), self.SimpleGraph([0, 1, 3]), self.SimpleGraph([0, 1, 4]), self.SimpleGraph([0, 5]) ] sack = resource_sack.GraphSack() sack.CORE_THRESHOLD = 0.5 for g in graphs: sack.ConsumeGraph(g) self.assertEqual(set(['0/', '1/']), sack.CoreSet())
def test_MultiParents(self): g1 = TestResourceGraph.FromRequestList( [MakeRequest(0, 'null'), MakeRequest(2, 0)]) g2 = TestResourceGraph.FromRequestList( [MakeRequest(1, 'null'), MakeRequest(2, 1)]) sack = resource_sack.GraphSack() sack.ConsumeGraph(g1) sack.ConsumeGraph(g2) self.assertEqual(3, len(sack.bags)) labels = {bag.label: bag for bag in sack.bags} self.assertEqual( set(['0/', '1/']), set([bag.label for bag in labels['2/'].Predecessors()])) self.assertFalse(labels['0/'].Predecessors()) self.assertFalse(labels['1/'].Predecessors())
def test_IntersectingCore(self): # Graph set A has core set {0, 1} and B {0, 2} so the final core set should # be {0}. Set C makes sure we restrict core computation to tags A and B. set_A = [self.SimpleGraph([0, 1, 2]), self.SimpleGraph([0, 1, 3])] set_B = [self.SimpleGraph([0, 2, 3]), self.SimpleGraph([0, 2, 1])] set_C = [self.SimpleGraph([2 * i + 4, 2 * i + 5]) for i in xrange(5)] sack = resource_sack.GraphSack() sack.CORE_THRESHOLD = 0.5 for g in set_A + set_B + set_C: sack.ConsumeGraph(g) self.assertEqual(set(), sack.CoreSet()) self.assertEqual(set(['0/', '1/']), sack.CoreSet(set_A)) self.assertEqual(set(['0/', '2/']), sack.CoreSet(set_B)) self.assertEqual(set(), sack.CoreSet(set_C)) self.assertEqual(set(['0/']), sack.CoreSet(set_A, set_B)) self.assertEqual(set(), sack.CoreSet(set_A, set_B, set_C))
def test_Core(self): # We will use a core threshold of 0.5 to make it easier to define # graphs. Resources 0 and 1 are core and others are not. We check full names # and node counts as we output that for core set analysis. In subsequent # tests we just check labels to make the tests easier to read. graphs = [ self.SimpleGraph([0, 1, 2]), self.SimpleGraph([0, 1, 3]), self.SimpleGraph([0, 1, 4]), self.SimpleGraph([0, 5]) ] sack = resource_sack.GraphSack() sack.CORE_THRESHOLD = 0.5 for g in graphs: sack.ConsumeGraph(g) self.assertEqual(set([('http://0', 4), ('http://1', 3)]), set((b.name, b.num_nodes) for b in sack.CoreSet()))
def test_MultiParents(self): g1 = TestDependencyGraph([MakeRequest(0, 'null'), MakeRequest(2, 0)]) g2 = TestDependencyGraph([MakeRequest(1, 'null'), MakeRequest(2, 1)]) sack = resource_sack.GraphSack() sack.ConsumeGraph(g1) sack.ConsumeGraph(g2) self.assertEqual(3, len(sack.bags)) labels = {bag.label: bag for bag in sack.bags} def Predecessors(label): bag = labels['%s/' % label] return [e.from_node for e in bag._sack._graph.InEdges(bag)] self.assertEqual(set(['0/', '1/']), set([bag.label for bag in Predecessors(2)])) self.assertFalse(Predecessors(0)) self.assertFalse(Predecessors(1))
def _Main(): import json import logging import sys import loading_model import loading_trace import resource_sack sack = resource_sack.GraphSack() for fname in sys.argv[1:]: trace = loading_trace.LoadingTrace.FromJsonDict( json.load(open(fname))) logging.info('Making graph from %s', fname) model = loading_model.ResourceGraph(trace, content_lens=None) sack.ConsumeGraph(model) logging.info('Finished %s', fname) ToDot(sack, sys.stdout, prune=.1)
def test_NodeMerge(self): g1 = TestDependencyGraph([ MakeRequest(0, 'null'), MakeRequest(1, 0), MakeRequest(2, 0), MakeRequest(3, 1) ]) g2 = TestDependencyGraph([ MakeRequest(0, 'null'), MakeRequest(1, 0), MakeRequest(2, 0), MakeRequest(4, 2) ]) sack = resource_sack.GraphSack() sack.ConsumeGraph(g1) sack.ConsumeGraph(g2) self.assertEqual(5, len(sack.bags)) for bag in sack.bags: if bag.label not in ('3/', '4/'): self.assertEqual(2, bag.num_nodes) else: self.assertEqual(1, bag.num_nodes)
def test_SimpleOutput(self): g1 = TestResourceGraph.FromRequestList([ MakeRequest(0, 'null'), MakeRequest(1, 0), MakeRequest(2, 0), MakeRequest(3, 1) ]) g2 = TestResourceGraph.FromRequestList([ MakeRequest(0, 'null'), MakeRequest(1, 0), MakeRequest(2, 0), MakeRequest(4, 2) ]) sack = resource_sack.GraphSack() sack.ConsumeGraph(g1) sack.ConsumeGraph(g2) buf = StringIO() resource_sack_display.ToDot(sack, buf, long_edge_msec=1000) dot = buf.getvalue() # Short edge. self.assertTrue( re.search(r'0 -> 1[^]]+color=green \]', dot, re.MULTILINE)) # Long edge. self.assertTrue(re.search(r'0 -> 3[^]]+penwidth=8', dot))