def test_view_graph_bundle_flow_partitions_must_be_equal(): material_partition_mn = Partition.Simple('material', ['m', 'n']) material_partition_XY = Partition.Simple('material', ['X', 'Y']) nodes = { 'a': ProcessGroup(selection=['a1']), 'b': ProcessGroup(selection=['b1']), 'c': ProcessGroup(selection=['c1']), 'via': Waypoint(), } order = [['a', 'b'], ['via'], ['c']] bundles = [ Bundle('a', 'c', waypoints=['via'], flow_partition=material_partition_mn), Bundle('b', 'c', waypoints=['via'], flow_partition=material_partition_XY), ] # Do partition based on flows stored in bundles with pytest.raises(ValueError): G = view_graph(SankeyDefinition(nodes, bundles, order)) bundles[1] = Bundle('b', 'c', waypoints=['via'], flow_partition=material_partition_mn) assert view_graph(SankeyDefinition(nodes, bundles, order))
def test_sankey_definition_as_script_with_partitions(): nodes = { 'a': ProcessGroup(selection=['a1', 'a2']), 'b': ProcessGroup(selection=['b1']), 'c': ProcessGroup(selection=['c1', 'c2'], partition=Partition.Simple('process', ['c1', 'c2'])), 'via': Waypoint(partition=Partition.Simple('material', ['m', 'n'])), } bundles = [ Bundle('a', 'c', waypoints=['via']), Bundle('b', 'c', waypoints=['via']), ] ordering = [[['a', 'b']], [['via']], [['c']]] sdd = SankeyDefinition(nodes, bundles, ordering, flow_partition=Partition.Simple( 'material', ['m', 'n'])) code = sdd.to_code() # Check roundtrip ctx = {} exec(code, ctx) assert ctx["sdd"] == sdd
def test_partition_simple_checks_for_duplicates(): with pytest.raises(ValueError): Partition.Simple('dim1', ['a', 'a']) with pytest.raises(ValueError): Partition.Simple('dim1', [ ('label1', ['a', 'b']), 'b' ])
def test_results_graph_material_key(): # Mock flow data flows = pd.DataFrame.from_records([ ('a1', 'c1', 'm', 'long', 3), ('a1', 'c1', 'n', 'long', 1), ], columns=('source', 'target', 'material_type', 'shape', 'value')) view_graph = LayeredGraph() view_graph.add_node('a', node=ProcessGroup()) view_graph.add_node('c', node=ProcessGroup()) view_graph.add_edge('a', 'c', bundles=[0]) view_graph.ordering = Ordering([[['a']], [['c']]]) bundle_flows = {0: flows} material_partition = Partition.Simple('material_type', ['m', 'n']) shape_partition = Partition.Simple('shape', ['long', 'thin']) # Partition based on material_type view_graph.edges['a', 'c']['flow_partition'] = material_partition Gr, groups = results_graph(view_graph, bundle_flows) assert sorted(Gr.edges(keys=True, data=True)) == [ ('a^*', 'c^*', ('m', '*'), { 'measures': { 'value': 3 }, 'original_flows': [0], 'bundles': [0] }), ('a^*', 'c^*', ('n', '*'), { 'measures': { 'value': 1 }, 'original_flows': [1], 'bundles': [0] }), ] # Partition based on shape view_graph.edges['a', 'c']['flow_partition'] = shape_partition Gr, groups = results_graph(view_graph, bundle_flows) assert sorted(Gr.edges(keys=True, data=True)) == [ ('a^*', 'c^*', ('long', '*'), { 'measures': { 'value': 4 }, 'original_flows': [0, 1], 'bundles': [0] }), ]
def test_results_graph_with_extra_or_not_enough_groups(): # Mock flow data bundle_flows = { 0: pd.DataFrame.from_records([ ('a1', 'b1', 'm', 3), ('a2', 'b1', 'm', 1), ], columns=('source', 'target', 'material', 'value')) } # Group 'a3' not used. ProcessGroup 'a2' isn't in any group. node_a = ProcessGroup(partition=Partition.Simple('process', ['a1', 'a3'])) node_b = ProcessGroup(partition=Partition.Simple('process', ['b1'])) view_graph = LayeredGraph() view_graph.add_node('a', node=node_a) view_graph.add_node('b', node=node_b) view_graph.add_edges_from([ ('a', 'b', { 'bundles': [0] }), ]) view_graph.ordering = Ordering([[['a']], [['b']]]) # Do partition based on flows stored in bundles Gr, groups = results_graph(view_graph, bundle_flows) assert set(Gr.nodes()) == {'a^a1', 'a^_', 'b^b1'} assert sorted(Gr.edges(keys=True, data=True)) == [ ('a^_', 'b^b1', ('*', '*'), { 'measures': { 'value': 1 }, 'original_flows': [1], 'bundles': [0] }), ('a^a1', 'b^b1', ('*', '*'), { 'measures': { 'value': 3 }, 'original_flows': [0], 'bundles': [0] }), ] assert Gr.ordering == Ordering([ [['a^a1', 'a^_']], [['b^b1']], ])
def test_view_graph_adds_waypoints_partition(): nodes = { 'n1': ProcessGroup(selection=['n1']), 'n2': ProcessGroup(selection=['n2']), } g = Partition.Simple('test', ['x']) bundles = [ Bundle('n1', 'n2', default_partition=g), ] order0 = [['n1'], [], ['n2']] G = view_graph(SankeyDefinition(nodes, bundles, order0)) assert sorted(nodes_ignoring_elsewhere(G, data=True)) == [ ('__n1_n2_1', { 'node': Waypoint(title='', partition=g) }), ('n1', { 'node': ProcessGroup(selection=['n1']) }), ('n2', { 'node': ProcessGroup(selection=['n2']) }), ]
def test_weave_results(): nodes = { 'a': ProcessGroup(selection=['a1', 'a2']), 'b': ProcessGroup(selection=['b1']), 'c': ProcessGroup(selection=['c1', 'c2'], partition=Partition.Simple('process', ['c1', 'c2'])), 'via': Waypoint(partition=Partition.Simple('material', ['m', 'n'])), } bundles = [ Bundle('a', 'c', waypoints=['via']), Bundle('b', 'c', waypoints=['via']), ] ordering = [[['a', 'b']], [['via']], [['c']]] sdd = SankeyDefinition(nodes, bundles, ordering) # Dataset flows = pd.DataFrame.from_records([ ('a1', 'c1', 'm', 3), ('a2', 'c1', 'n', 1), ('b1', 'c1', 'm', 1), ('b1', 'c2', 'm', 2), ('b1', 'c2', 'n', 1), ], columns=('source', 'target', 'material', 'value')) dim_process = pd.DataFrame({ 'id': list(flows.source.unique()) + list(flows.target.unique()) }).set_index('id') dataset = Dataset(flows, dim_process) result = weave(sdd, dataset) def link(src, tgt, original_flows, value, link_type='*', color='#FBB4AE'): return SankeyLink(source=src, target=tgt, type=link_type, time='*', data={'value': value}, title=link_type, color=color, original_flows=original_flows) assert set(n.id for n in result.nodes) == { 'a^*', 'b^*', 'via^m', 'via^n', 'c^c1', 'c^c2' } assert sorted(result.links) == [ link('a^*', 'via^m', [0], 3), link('a^*', 'via^n', [1], 1), link('b^*', 'via^m', [2, 3], 3), link('b^*', 'via^n', [4], 1), link('via^m', 'c^c1', [0, 2], 4), link('via^m', 'c^c2', [3], 2), link('via^n', 'c^c1', [1], 1), link('via^n', 'c^c2', [4], 1), ] assert result.ordering == Ordering([ [['a^*', 'b^*']], [['via^m', 'via^n']], [['c^c1', 'c^c2']], ]) assert result.groups == [ { 'id': 'via', 'title': '', 'type': 'group', 'nodes': ['via^m', 'via^n'] }, { 'id': 'c', 'title': '', 'type': 'process', 'nodes': ['c^c1', 'c^c2'] }, ] # Can also set flow_partition for all bundles at once sdd2 = SankeyDefinition(nodes, bundles, ordering, flow_partition=Partition.Simple( 'material', ['m', 'n'])) scale = CategoricalScale('type', palette=['red', 'blue']) scale.set_domain(['m', 'n']) result = weave(sdd2, dataset, link_color=scale) assert sorted(result.links) == [ link('a^*', 'via^m', [0], 3, 'm', 'red'), link('a^*', 'via^n', [1], 1, 'n', 'blue'), link('b^*', 'via^m', [2, 3], 3, 'm', 'red'), link('b^*', 'via^n', [4], 1, 'n', 'blue'), link('via^m', 'c^c1', [0, 2], 4, 'm', 'red'), link('via^m', 'c^c2', [3], 2, 'm', 'red'), link('via^n', 'c^c1', [1], 1, 'n', 'blue'), link('via^n', 'c^c2', [4], 1, 'n', 'blue'), ]
def test_simple_partition_groups(): G = Partition.Simple('dim1', ['x', ('group', ['y', 'z'])]) assert G.labels == ['x', 'group'] assert G.groups == (Group('x', [('dim1', ('x', ))]), Group('group', [('dim1', ('y', 'z'))]), )
def test_results_graph_overall(): material_partition = Partition.Simple('material', ['m', 'n']) c_partition = Partition.Simple('process', ['c1', 'c2']) view_graph = LayeredGraph() view_graph.add_node('a', node=ProcessGroup(title='Node a')) view_graph.add_node('b', node=ProcessGroup()) view_graph.add_node('c', node=ProcessGroup(partition=c_partition)) view_graph.add_node('via', node=Waypoint(partition=material_partition)) view_graph.add_edges_from([ ('a', 'via', { 'bundles': [0], 'flow_partition': material_partition }), ('b', 'via', { 'bundles': [1], 'flow_partition': material_partition }), ('via', 'c', { 'bundles': [0, 1], 'flow_partition': material_partition }), ]) view_graph.ordering = Ordering([[['a', 'b']], [['via']], [['c']]]) # Mock flow data bundle_flows = { 0: pd.DataFrame.from_records([ ('a1', 'c1', 'm', 3), ('a2', 'c1', 'n', 1), ], index=(0, 1), columns=('source', 'target', 'material', 'value')), 1: pd.DataFrame.from_records([ ('b1', 'c1', 'm', 1), ('b1', 'c2', 'm', 2), ('b1', 'c2', 'n', 1), ], index=(2, 3, 4), columns=('source', 'target', 'material', 'value')) } # Do partition based on flows stored in bundles Gr, groups = results_graph(view_graph, bundle_flows) assert sorted(Gr.nodes(data=True)) == [ ('a^*', { 'direction': 'R', 'type': 'process', 'title': 'Node a' }), ('b^*', { 'direction': 'R', 'type': 'process', 'title': 'b' }), ('c^c1', { 'direction': 'R', 'type': 'process', 'title': 'c1' }), ('c^c2', { 'direction': 'R', 'type': 'process', 'title': 'c2' }), ('via^m', { 'direction': 'R', 'type': 'group', 'title': 'm' }), ('via^n', { 'direction': 'R', 'type': 'group', 'title': 'n' }), ] assert sorted(Gr.edges(keys=True, data=True)) == [ ('a^*', 'via^m', ('m', '*'), { 'measures': { 'value': 3 }, 'original_flows': [0], 'bundles': [0] }), ('a^*', 'via^n', ('n', '*'), { 'measures': { 'value': 1 }, 'original_flows': [1], 'bundles': [0] }), ('b^*', 'via^m', ('m', '*'), { 'measures': { 'value': 3 }, 'original_flows': [2, 3], 'bundles': [1] }), ('b^*', 'via^n', ('n', '*'), { 'measures': { 'value': 1 }, 'original_flows': [4], 'bundles': [1] }), ('via^m', 'c^c1', ('m', '*'), { 'measures': { 'value': 4 }, 'original_flows': [0, 2], 'bundles': [0, 1] }), ('via^m', 'c^c2', ('m', '*'), { 'measures': { 'value': 2 }, 'original_flows': [3], 'bundles': [0, 1] }), ('via^n', 'c^c1', ('n', '*'), { 'measures': { 'value': 1 }, 'original_flows': [1], 'bundles': [0, 1] }), ('via^n', 'c^c2', ('n', '*'), { 'measures': { 'value': 1 }, 'original_flows': [4], 'bundles': [0, 1] }), ] assert Gr.ordering == Ordering([ [['a^*', 'b^*']], [['via^m', 'via^n']], [['c^c1', 'c^c2']], ]) # Only includes groups where the title is not the same as the single node # title assert groups == [ { 'id': 'via', 'title': '', 'type': 'group', 'nodes': ['via^m', 'via^n'] }, { 'id': 'c', 'title': '', 'type': 'process', 'nodes': ['c^c1', 'c^c2'] }, ]
def test_results_graph_time_partition(): time_partition = Partition.Simple('time', [1, 2]) view_graph = LayeredGraph() view_graph.add_node('a', node=ProcessGroup()) view_graph.add_node('b', node=ProcessGroup()) view_graph.add_edges_from([ ('a', 'b', { 'bundles': [0] }), ]) view_graph.ordering = Ordering([[['a']], [['b']]]) # Mock flow data bundle_flows = { 0: pd.DataFrame.from_records([ ('a1', 'b1', 'm', 1, 3), ('a2', 'b1', 'n', 1, 1), ('a2', 'b2', 'n', 1, 2), ('a1', 'b1', 'm', 2, 1), ('a1', 'b1', 'n', 2, 3), ], columns=('source', 'target', 'material', 'time', 'value')), } # Do partition based on flows stored in bundles Gr, groups = results_graph(view_graph, bundle_flows, time_partition=time_partition) assert sorted(Gr.edges(keys=True, data=True)) == [ ('a^*', 'b^*', ('*', '1'), { 'measures': { 'value': 6 }, 'original_flows': [0, 1, 2], 'bundles': [0] }), ('a^*', 'b^*', ('*', '2'), { 'measures': { 'value': 4 }, 'original_flows': [3, 4], 'bundles': [0] }), ] # Now add a material partition too material_partition = Partition.Simple('material', ['m', 'n']) Gr, groups = results_graph(view_graph, bundle_flows, material_partition, time_partition) assert sorted(Gr.edges(keys=True, data=True)) == [ ('a^*', 'b^*', ('m', '1'), { 'measures': { 'value': 3 }, 'original_flows': [0], 'bundles': [0] }), ('a^*', 'b^*', ('m', '2'), { 'measures': { 'value': 1 }, 'original_flows': [3], 'bundles': [0] }), ('a^*', 'b^*', ('n', '1'), { 'measures': { 'value': 3 }, 'original_flows': [1, 2], 'bundles': [0] }), ('a^*', 'b^*', ('n', '2'), { 'measures': { 'value': 3 }, 'original_flows': [4], 'bundles': [0] }), ]
def test_results_graph_elsewhere_stubs(): b_partition = Partition.Simple('process', ['b1', 'b2']) view_graph = LayeredGraph() view_graph.add_node('a', node=ProcessGroup()) view_graph.add_node('b', node=ProcessGroup(partition=b_partition), from_elsewhere_bundles=[1]) view_graph.add_edge('a', 'b', bundles=[0]) view_graph.ordering = Ordering([[['a']], [['b']]]) # Mock flow data bundle_flows = { 0: pd.DataFrame.from_records([ ('a1', 'b1', 'm', 3), ('a2', 'b1', 'n', 1), ], index=(0, 1), columns=('source', 'target', 'material', 'value')), 1: pd.DataFrame.from_records([ ('x1', 'b1', 'm', 1), ('x3', 'b2', 'n', 5), ], index=(2, 3), columns=('source', 'target', 'material', 'value')) } # Do partition based on flows stored in bundles Gr, groups = results_graph(view_graph, bundle_flows) assert sorted(Gr.nodes(data=True)) == [ ('a^*', { 'direction': 'R', 'type': 'process', 'title': 'a' }), ('b^b1', { 'direction': 'R', 'type': 'process', 'title': 'b1', 'from_elsewhere_edges': [ (('*', '*'), { 'measures': { 'value': 1 }, 'original_flows': [2], 'bundles': [1] }), ] }), ('b^b2', { 'direction': 'R', 'type': 'process', 'title': 'b2', 'from_elsewhere_edges': [ (('*', '*'), { 'measures': { 'value': 5 }, 'original_flows': [3], 'bundles': [1] }), ] }), ] assert sorted(Gr.edges(keys=True, data=True)) == [ ('a^*', 'b^b1', ('*', '*'), { 'measures': { 'value': 4 }, 'original_flows': [0, 1], 'bundles': [0] }), ]