def test_dataset_only_includes_unused_flows_in_elsewhere_bundles(): # Bundle 0 should include flow 0, bundle 1 should include flow 1 nodes = { 'a': ProcessGroup(selection=['a']), 'x': ProcessGroup(selection=['x']), } bundles = { 0: Bundle('a', 'x'), 1: Bundle(Elsewhere, 'x'), } # Dataset flows = pd.DataFrame.from_records( [ ('a', 'x', 'm', 1), ('b', 'x', 'm', 1), ], columns=('source', 'target', 'material', 'value')) dataset = Dataset(flows) bundle_flows, _ = dataset.apply_view(nodes, bundles) def get_source_target(b): return [(row['source'], row['target']) for i, row in bundle_flows[b].iterrows()] assert get_source_target(0) == [('a', 'x')] assert get_source_target(1) == [('b', 'x')] # Check it works with duplicated flow index values (old bug) flows.index = [0, 0] dataset = Dataset(flows) bundle_flows, _ = dataset.apply_view(nodes, bundles) assert get_source_target(0) == [('a', 'x')] assert get_source_target(1) == [('b', 'x')]
def test_sankey_definition_as_script_with_partitions(): nodes = { 'a': ProcessGroup(selection=['a1', 'a2']), 'b': ProcessGroup(selection=['b1']), 'c': ProcessGroup(selection=['c1', 'c2'], partition=Partition.Simple('process', ['c1', 'c2'])), 'via': Waypoint(partition=Partition.Simple('material', ['m', 'n'])), } bundles = [ Bundle('a', 'c', waypoints=['via']), Bundle('b', 'c', waypoints=['via']), ] ordering = [[['a', 'b']], [['via']], [['c']]] sdd = SankeyDefinition(nodes, bundles, ordering, flow_partition=Partition.Simple( 'material', ['m', 'n'])) code = sdd.to_code() # Check roundtrip ctx = {} exec(code, ctx) assert ctx["sdd"] == sdd
def test_view_graph_does_short_bundles_last(): """Return loops are inserted immediately below the source node, so work from the outside in.""" # # ,a -- b -- c-, # | `----`| # `-----------' # nodes = { 'a': ProcessGroup(selection=('a', )), 'b': ProcessGroup(selection=('b', )), 'c': ProcessGroup(selection=('c', )), } order = [[['a']], [['b']], [['c']]] bundles = [ Bundle('a', 'b'), Bundle('b', 'c'), Bundle('c', 'b'), Bundle('c', 'a'), ] G = view_graph(SankeyDefinition(nodes, bundles, order)) assert G.ordering == Ordering([ [['a', '__c_a_0']], [['b', '__c_b_1', '__c_a_1']], [['c', '__c_b_2', '__c_a_2']], ]) # order of bundles doesn't affect it G2 = view_graph(SankeyDefinition(nodes, bundles[::-1], order)) assert G.ordering == G2.ordering
def test_view_graph_does_non_dummy_bundles_first(): """It's important to do bundles that don't require adding dummy nodes first, so when it comes to return loops, they are better placed.""" nodes = { 'a': ProcessGroup(selection=('a', )), 'b': ProcessGroup(selection=('b', )), 'c': ProcessGroup(selection=('c', )), 'd': ProcessGroup(selection=('d', )), } order = [[['a', 'c']], [['b', 'd']]] bundles = [ Bundle('a', 'b'), Bundle('c', 'd'), Bundle('b', 'a'), ] G = view_graph(SankeyDefinition(nodes, bundles, order)) assert G.ordering == Ordering([ [['a', '__b_a_0', 'c']], [['b', '__b_a_1', 'd']], ]) # order of bundles doesn't affect it G2 = view_graph(SankeyDefinition(nodes, bundles[::-1], order)) assert G2.ordering == G.ordering
def test_view_graph_bundle_flow_partitions_must_be_equal(): material_partition_mn = Partition.Simple('material', ['m', 'n']) material_partition_XY = Partition.Simple('material', ['X', 'Y']) nodes = { 'a': ProcessGroup(selection=['a1']), 'b': ProcessGroup(selection=['b1']), 'c': ProcessGroup(selection=['c1']), 'via': Waypoint(), } order = [['a', 'b'], ['via'], ['c']] bundles = [ Bundle('a', 'c', waypoints=['via'], flow_partition=material_partition_mn), Bundle('b', 'c', waypoints=['via'], flow_partition=material_partition_XY), ] # Do partition based on flows stored in bundles with pytest.raises(ValueError): G = view_graph(SankeyDefinition(nodes, bundles, order)) bundles[1] = Bundle('b', 'c', waypoints=['via'], flow_partition=material_partition_mn) assert view_graph(SankeyDefinition(nodes, bundles, order))
def test_view_graph_merges_bundles_between_same_nodes(): nodes = { 'n1': ProcessGroup(selection=['n1']), 'n2': ProcessGroup(selection=['n2']), 'n3': ProcessGroup(selection=['n3']), 'via': Waypoint(), } order0 = [['n1', 'n2'], ['via'], ['n3']] bundles = [ Bundle('n1', 'n3', waypoints=['via']), Bundle('n2', 'n3', waypoints=['via']), ] G = view_graph(SankeyDefinition(nodes, bundles, order0)) assert G.nodes['n3'] == {'node': nodes['n3']} assert sorted(edges_ignoring_elsewhere(G, data=True)) == [ ('n1', 'via', { 'bundles': [0] }), ('n2', 'via', { 'bundles': [1] }), ('via', 'n3', { 'bundles': [0, 1] }), ]
def test_unused_flows(): """Unused flows are between *used* nodes """ # view definition: # Elsewhere --> [a] --> Elsewhere # Elsewhere --> [b] --> Elsewhere # # dataset: # other --> a --> other # other --> b --> other # a --> b --> c # # The a --> b flow in the dataset is "unused" # The b --> c flow is not unused since c isn't visible # nodes = { 'other': ProcessGroup(selection=['other']), 'a': ProcessGroup(selection=['a']), 'b': ProcessGroup(selection=['b']), } bundles = { 0: Bundle(Elsewhere, 'a'), 1: Bundle(Elsewhere, 'b'), 2: Bundle('a', Elsewhere), 3: Bundle('b', Elsewhere), } # Dataset flows = pd.DataFrame.from_records( [ ('other', 'a', 'm', 1), ('other', 'b', 'm', 1), ('a', 'other', 'm', 1), ('b', 'other', 'm', 1), ('a', 'b', 'm', 1), ('b', 'c', 'm', 1), ], columns=('source', 'target', 'material', 'value')) dim_process = pd.DataFrame( {'id': ['a', 'b', 'c', 'other']}).set_index('id') dataset = Dataset(flows, dim_process) bundle_flows, unused = dataset.apply_view(nodes, bundles) def get_source_target(b): return [(row['source'], row['target']) for i, row in bundle_flows[b].iterrows()] assert get_source_target(0) == [('other', 'a')] assert get_source_target(1) == [('other', 'b'), ('a', 'b')] assert get_source_target(2) == [('a', 'other'), ('a', 'b')] assert get_source_target(3) == [('b', 'other'), ('b', 'c')] assert len(unused) == 1 assert unused.iloc[0].equals(flows.iloc[4])
def test_elsewhere_bundles(): nodes = {'a': ProcessGroup(selection=['a1']), } bundles = {} order = [[], ['a'], []] # not at min/max rank vd = SankeyDefinition(nodes, bundles, order) new_waypoints, new_bundles = elsewhere_bundles(vd) assert set(new_waypoints.keys()) == {'__a>', '__>a'} assert set(new_bundles.values()) == { Bundle('a', Elsewhere, waypoints=['__a>']), Bundle(Elsewhere, 'a', waypoints=['__>a']), }
def test_view_graph_Elsewhere_bundles(): nodes = { 'a': ProcessGroup(selection=('a', )), 'b': ProcessGroup(selection=('b', )), } order = [[['a']], [['b']]] bundles = [ Bundle('a', 'b'), Bundle(Elsewhere, 'b'), ] G = view_graph(SankeyDefinition(nodes, bundles, order)) assert sorted(G.nodes(data=True)) == [ ('a', { 'node': ProcessGroup(selection=('a', )) }), ('b', { 'node': ProcessGroup(selection=('b', )), 'from_elsewhere_bundles': [1] }), ] assert sorted(G.edges(data=True)) == [ ('a', 'b', { 'bundles': [0] }), ] # Now with a Waypoint on the Elsewhere bundle nodes['w'] = Waypoint() bundles[1] = Bundle(Elsewhere, 'b', waypoints=['w']) order = [[['a', 'w']], [['b']]] G2 = view_graph(SankeyDefinition(nodes, bundles, order)) assert sorted(G2.nodes(data=True)) == [ ('a', { 'node': ProcessGroup(selection=('a', )) }), ('b', { 'node': ProcessGroup(selection=('b', )) }), ('w', { 'node': Waypoint() }), ] assert sorted(G2.edges(data=True)) == [ ('a', 'b', { 'bundles': [0] }), ('w', 'b', { 'bundles': [1] }), ]
def test_elsewhere_bundles_does_not_duplicate(): nodes = { 'a': ProcessGroup(selection=('a1')), 'in': Waypoint(), 'out': Waypoint() } bundles = { 0: Bundle(Elsewhere, 'a', waypoints=['in']), 1: Bundle('a', Elsewhere, waypoints=['out']), } order = [['in'], ['a'], ['out']] # not at min/max rank vd = SankeyDefinition(nodes, bundles, order) new_waypoints, new_bundles = elsewhere_bundles(vd) assert new_bundles == {}
def test_sankey_definition_checks_nodes_exist(): nodes = { 'a': ProcessGroup(selection=('a1')), 'b': ProcessGroup(selection=('b1')), 'waypoint': ProcessGroup(), } ordering = Ordering([]) with pytest.raises(ValueError): bundles = [Bundle('does not exist', 'b')] SankeyDefinition(nodes, bundles, ordering) with pytest.raises(ValueError): bundles = [Bundle('a', 'b', waypoints=['does not exist'])] SankeyDefinition(nodes, bundles, ordering)
def test_elsewhere_bundles_not_added_at_minmax_rank_when_one_bundle_defined(): nodes = {'a': ProcessGroup(selection=['a1'])} bundles = {0: Bundle('a', Elsewhere)} order = [['a']] vd = SankeyDefinition(nodes, bundles, order) new_waypoints, new_bundles = elsewhere_bundles(vd) assert len(new_waypoints) == 0 assert len(new_bundles) == 0
def test_view_graph_does_not_mutate_definition(): nodes = { 'n1': ProcessGroup(selection=['n1']), 'n2': ProcessGroup(selection=['n2']), } bundles = [ Bundle('n1', 'n2'), ] order0 = [['n1'], [], ['n2']] vd = SankeyDefinition(nodes, bundles, order0) G = view_graph(vd) assert vd.nodes == { 'n1': ProcessGroup(selection=['n1']), 'n2': ProcessGroup(selection=['n2']), } assert vd.bundles == { 0: Bundle('n1', 'n2'), } assert vd.ordering == Ordering([[['n1']], [[]], [['n2']]])
def test_internal_flows_elsewhere(): """Internal flows should not be included in to/from Elsewhere bundles. """ # view definition: # Elsewhere --> [a,b] --> Elsewhere # # dataset: # other --> a --> b --> other # nodes = { 'other': ProcessGroup(selection=['other']), 'ab': ProcessGroup(selection=['a', 'b']), } bundles = { 0: Bundle(Elsewhere, 'ab'), 1: Bundle('ab', Elsewhere), } # Dataset flows = pd.DataFrame.from_records( [ ('other', 'a', 'm', 1), ('a', 'b', 'm', 1), ('b', 'other', 'm', 1), ], columns=('source', 'target', 'material', 'value')) dim_process = pd.DataFrame({'id': ['a', 'b', 'other']}).set_index('id') dataset = Dataset(flows, dim_process) bundle_flows, unused = dataset.apply_view(nodes, bundles) def get_source_target(b): return [(row['source'], row['target']) for i, row in bundle_flows[b].iterrows()] assert get_source_target(0) == [('other', 'a')] assert get_source_target(1) == [('b', 'other')] assert len(unused) == 0
def test_augment_waypoint_alignment(): # j -- a -- x # b # k -- c -- y # # should insert "from b" betwen x and y # and "to b" between j and k G = LayeredGraph() G.add_nodes_from([ ('a', {'node': ProcessGroup()}), ('b', {'node': ProcessGroup(selection=['b1'])}), ('c', {'node': ProcessGroup()}), ('x', {'node': ProcessGroup()}), ('y', {'node': ProcessGroup()}), ('j', {'node': ProcessGroup()}), ('k', {'node': ProcessGroup()}), ]) G.add_edges_from([ ('a', 'x', {'bundles': [2]}), ('k', 'c', {'bundles': [1]}), ('j', 'a', {'bundles': [0]}), ('c', 'y', {'bundles': [3]}), ]) G.ordering = Ordering([[['j', 'k']], [['a', 'b', 'c']], [['x', 'y']]]) new_waypoints = { 'from b': Waypoint(), 'to b': Waypoint(), } new_bundles = { 'b>': Bundle('b', Elsewhere, waypoints=['from b']), '>b': Bundle(Elsewhere, 'b', waypoints=['to b']), } G2 = augment(G, new_waypoints, new_bundles) assert set(G2.nodes()).difference(G.nodes()) == {'from b', 'to b'} assert G2.ordering == Ordering([ [['j', 'to b', 'k']], [['a', 'b', 'c']], [['x', 'from b', 'y']] ])
def test_view_graph_adds_waypoints(): nodes = { 'n1': ProcessGroup(selection=['n1']), 'n2': ProcessGroup(selection=['n2']), 'w1': Waypoint(), } bundles = [ Bundle('n1', 'n2', waypoints=['w1']), ] order0 = [['n1'], [], ['w1'], [], [], ['n2']] G = view_graph(SankeyDefinition(nodes, bundles, order0)) assert sorted(nodes_ignoring_elsewhere(G, data=True)) == [ ('__n1_w1_1', { 'node': Waypoint(title='') }), ('__w1_n2_3', { 'node': Waypoint(title='') }), ('__w1_n2_4', { 'node': Waypoint(title='') }), ('n1', { 'node': ProcessGroup(selection=['n1']) }), ('n2', { 'node': ProcessGroup(selection=['n2']) }), ('w1', { 'node': Waypoint() }), ] assert sorted(edges_ignoring_elsewhere(G, data=True)) == [ ('__n1_w1_1', 'w1', { 'bundles': [0] }), ('__w1_n2_3', '__w1_n2_4', { 'bundles': [0] }), ('__w1_n2_4', 'n2', { 'bundles': [0] }), ('n1', '__n1_w1_1', { 'bundles': [0] }), ('w1', '__w1_n2_3', { 'bundles': [0] }), ] assert G.ordering == Ordering([[['n1']], [['__n1_w1_1']], [['w1']], [['__w1_n2_3']], [['__w1_n2_4']], [['n2']]])
def test_bundle_elsewhere(): assert Bundle('a', 'b').to_elsewhere == False assert Bundle('a', 'b').from_elsewhere == False assert Bundle(Elsewhere, 'b').to_elsewhere == False assert Bundle(Elsewhere, 'b').from_elsewhere == True assert Bundle('a', Elsewhere).to_elsewhere == True assert Bundle('a', Elsewhere).from_elsewhere == False
def test_sankey_definition_checks_bundles(): nodes = { 'a': ProcessGroup(selection=('a1')), 'b': ProcessGroup(selection=('b1')), 'waypoint': Waypoint(), } ordering = Ordering([]) with pytest.raises(ValueError): bundles = {0: Bundle('waypoint', 'b')} SankeyDefinition(nodes, bundles, ordering) with pytest.raises(ValueError): bundles = {0: Bundle('b', 'waypoint')} SankeyDefinition(nodes, bundles, ordering) # should work bundles = {0: Bundle('a', 'b')} assert SankeyDefinition(nodes, bundles, ordering) # also accepts a list bundles = [Bundle('a', 'b')] assert SankeyDefinition(nodes, bundles, ordering).bundles \ == {0: Bundle('a', 'b')}
def test_internal_flows(): nodes = { 'a': ProcessGroup(selection=['a']), 'bcd': ProcessGroup(selection=['b', 'c', 'd']), 'e': ProcessGroup(selection=['e']), } bundles = { 0: Bundle('a', 'bcd'), 1: Bundle('bcd', 'e'), 2: Bundle('bcd', 'bcd', flow_selection='source == "c"'), } ordering = [['a'], ['bcd'], ['e']] # Dataset flows = pd.DataFrame.from_records( [ ('a', 'b', 'm', 4), ('b', 'c', 'm', 3), ('b', 'd', 'm', 1), ('c', 'b', 'm', 2), ('c', 'e', 'm', 1), ], columns=('source', 'target', 'material', 'value')) dataset = Dataset(flows) bundle_flows, unused = dataset.apply_view(nodes, bundles) def get_source_target(b): return [(row['source'], row['target'], row['value']) for i, row in bundle_flows[b].iterrows()] assert get_source_target(0) == [('a', 'b', 4)] assert get_source_target(1) == [('c', 'e', 1)] assert get_source_target(2) == [('c', 'b', 2)] assert len(unused) == 0
def test_weave_accepts_dataframe_as_dataset(): nodes = { 'a': ProcessGroup(selection=['a']), 'b': ProcessGroup(selection=['b']), } bundles = [ Bundle('a', 'b'), ] ordering = [['a'], ['b']] sdd = SankeyDefinition(nodes, bundles, ordering) flows = pd.DataFrame.from_records([('a', 'b', 'm', 3)], columns=('source', 'target', 'material', 'value')) result = weave(sdd, flows)
def test_sankey_definition_as_script(): nodes = { 'a': ProcessGroup(selection=['a1']), 'b': ProcessGroup(selection=['b1']), 'waypoint': Waypoint(), } ordering = [['a'], ['waypoint'], ['b']] bundles = [Bundle('a', 'b')] sdd = SankeyDefinition(nodes, bundles, ordering) code = sdd.to_code() assert code == dedent(""" from floweaver import ( ProcessGroup, Waypoint, Partition, Group, Elsewhere, Bundle, SankeyDefinition, ) nodes = { 'a': ProcessGroup(selection=['a1']), 'b': ProcessGroup(selection=['b1']), 'waypoint': Waypoint(), } ordering = [ [['a']], [['waypoint']], [['b']], ] bundles = [ Bundle(source='a', target='b'), ] sdd = SankeyDefinition(nodes, bundles, ordering) """) # Check roundtrip ctx = {} exec(code, ctx) assert ctx["sdd"] == sdd
def test_results_graph_bands(): bundles = [ Bundle('a', 'b'), ] # Mock flow data bundle_flows = { bundles[0]: pd.DataFrame.from_records([ ('a1', 'b1', 'm', 3), ], columns=('source', 'target', 'material', 'value')) } view_graph = LayeredGraph() view_graph.add_node('a', node=ProcessGroup()) view_graph.add_node('b', node=ProcessGroup()) view_graph.add_edges_from([ ('a', 'b', { 'bundles': bundles }), ]) view_graph.ordering = Ordering([ [['a'], []], [[], ['b']], ]) # Do partition based on flows stored in bundles Gr, groups = results_graph(view_graph, bundle_flows) assert Gr.ordering == Ordering([ # rank 1 [['a^*'], []], # rank 2 [[], ['b^*']], ])
def test_view_graph_adds_waypoints_partition(): nodes = { 'n1': ProcessGroup(selection=['n1']), 'n2': ProcessGroup(selection=['n2']), } g = Partition.Simple('test', ['x']) bundles = [ Bundle('n1', 'n2', default_partition=g), ] order0 = [['n1'], [], ['n2']] G = view_graph(SankeyDefinition(nodes, bundles, order0)) assert sorted(nodes_ignoring_elsewhere(G, data=True)) == [ ('__n1_n2_1', { 'node': Waypoint(title='', partition=g) }), ('n1', { 'node': ProcessGroup(selection=['n1']) }), ('n2', { 'node': ProcessGroup(selection=['n2']) }), ]
def test_weave_results(): nodes = { 'a': ProcessGroup(selection=['a1', 'a2']), 'b': ProcessGroup(selection=['b1']), 'c': ProcessGroup(selection=['c1', 'c2'], partition=Partition.Simple('process', ['c1', 'c2'])), 'via': Waypoint(partition=Partition.Simple('material', ['m', 'n'])), } bundles = [ Bundle('a', 'c', waypoints=['via']), Bundle('b', 'c', waypoints=['via']), ] ordering = [[['a', 'b']], [['via']], [['c']]] sdd = SankeyDefinition(nodes, bundles, ordering) # Dataset flows = pd.DataFrame.from_records([ ('a1', 'c1', 'm', 3), ('a2', 'c1', 'n', 1), ('b1', 'c1', 'm', 1), ('b1', 'c2', 'm', 2), ('b1', 'c2', 'n', 1), ], columns=('source', 'target', 'material', 'value')) dim_process = pd.DataFrame({ 'id': list(flows.source.unique()) + list(flows.target.unique()) }).set_index('id') dataset = Dataset(flows, dim_process) result = weave(sdd, dataset) def link(src, tgt, original_flows, value, link_type='*', color='#FBB4AE'): return SankeyLink(source=src, target=tgt, type=link_type, time='*', data={'value': value}, title=link_type, color=color, original_flows=original_flows) assert set(n.id for n in result.nodes) == { 'a^*', 'b^*', 'via^m', 'via^n', 'c^c1', 'c^c2' } assert sorted(result.links) == [ link('a^*', 'via^m', [0], 3), link('a^*', 'via^n', [1], 1), link('b^*', 'via^m', [2, 3], 3), link('b^*', 'via^n', [4], 1), link('via^m', 'c^c1', [0, 2], 4), link('via^m', 'c^c2', [3], 2), link('via^n', 'c^c1', [1], 1), link('via^n', 'c^c2', [4], 1), ] assert result.ordering == Ordering([ [['a^*', 'b^*']], [['via^m', 'via^n']], [['c^c1', 'c^c2']], ]) assert result.groups == [ { 'id': 'via', 'title': '', 'type': 'group', 'nodes': ['via^m', 'via^n'] }, { 'id': 'c', 'title': '', 'type': 'process', 'nodes': ['c^c1', 'c^c2'] }, ] # Can also set flow_partition for all bundles at once sdd2 = SankeyDefinition(nodes, bundles, ordering, flow_partition=Partition.Simple( 'material', ['m', 'n'])) scale = CategoricalScale('type', palette=['red', 'blue']) scale.set_domain(['m', 'n']) result = weave(sdd2, dataset, link_color=scale) assert sorted(result.links) == [ link('a^*', 'via^m', [0], 3, 'm', 'red'), link('a^*', 'via^n', [1], 1, 'n', 'blue'), link('b^*', 'via^m', [2, 3], 3, 'm', 'red'), link('b^*', 'via^n', [4], 1, 'n', 'blue'), link('via^m', 'c^c1', [0, 2], 4, 'm', 'red'), link('via^m', 'c^c2', [3], 2, 'm', 'red'), link('via^n', 'c^c1', [1], 1, 'n', 'blue'), link('via^n', 'c^c2', [4], 1, 'n', 'blue'), ]
def test_bundle_to_self_allowed_only_if_flow_selection_specified(): with pytest.raises(ValueError): Bundle('x', 'x') assert Bundle('x', 'x', flow_selection='...')
def test_bundle_hashable(): assert hash(Bundle('a', 'b'))