def traverse_graph(starting_pks, max_iterations=None, get_links=False, links_forward=(), links_backward=()): """ This function will return the set of all nodes that can be connected to a list of initial nodes through any sequence of specified links. Optionally, it may also return the links that connect these nodes. :type starting_pks: list or tuple or set :param starting_pks: Contains the (valid) pks of the starting nodes. :type max_iterations: int or None :param max_iterations: The number of iterations to apply the set of rules (a value of 'None' will iterate until no new nodes are added). :param bool get_links: Pass True to also return the links between all nodes (found + initial). :type links_forward: aiida.common.links.LinkType :param links_forward: List with all the links that should be traversed in the forward direction. :type links_backward: aiida.common.links.LinkType :param links_backward: List with all the links that should be traversed in the backward direction. """ # pylint: disable=too-many-locals,too-many-statements,too-many-branches from aiida import orm from aiida.tools.graph.age_entities import Basket from aiida.tools.graph.age_rules import UpdateRule, RuleSequence, RuleSaveWalkers, RuleSetWalkers from aiida.common import exceptions if max_iterations is None: max_iterations = inf elif not (isinstance(max_iterations, int) or max_iterations is inf): raise TypeError('Max_iterations has to be an integer or infinity') linktype_list = [] for linktype in links_forward: if not isinstance(linktype, LinkType): raise TypeError('links_forward should contain links, but one of them is: {}'.format(type(linktype))) linktype_list.append(linktype.value) filters_forwards = {'type': {'in': linktype_list}} linktype_list = [] for linktype in links_backward: if not isinstance(linktype, LinkType): raise TypeError('links_backward should contain links, but one of them is: {}'.format(type(linktype))) linktype_list.append(linktype.value) filters_backwards = {'type': {'in': linktype_list}} if not isinstance(starting_pks, (list, set, tuple)): raise TypeError('starting_pks must be of type list, set or tuple\ninstead, it is {}'.format(type(starting_pks))) if not starting_pks: if get_links: output = {'nodes': set(), 'links': set()} else: output = {'nodes': set(), 'links': None} return output if any([not isinstance(pk, int) for pk in starting_pks]): raise TypeError('one of the starting_pks is not of type int:\n {}'.format(starting_pks)) operational_set = set(starting_pks) query_nodes = orm.QueryBuilder() query_nodes.append(orm.Node, project=['id'], filters={'id': {'in': operational_set}}) existing_pks = set(query_nodes.all(flat=True)) missing_pks = operational_set.difference(existing_pks) if missing_pks: raise exceptions.NotExistent( 'The following pks are not in the database and must be pruned before this call: {}'.format(missing_pks) ) rules = [] basket = Basket(nodes=operational_set) # When max_iterations is finite, the order of traversal may affect the result # (its not the same to first go backwards and then forwards than vice-versa) # In order to make it order-independent, the result of the first operation needs # to be stashed and the second operation must be performed only on the nodes # that were already in the set at the begining of the iteration: this way, both # rules are applied on the same set of nodes and the order doesn't matter. # The way to do this is saving and seting the walkers at the right moments only # when both forwards and backwards rules are present. if links_forward and links_backward: stash = basket.get_template() rules += [RuleSaveWalkers(stash)] if links_forward: query_outgoing = orm.QueryBuilder() query_outgoing.append(orm.Node, tag='sources') query_outgoing.append(orm.Node, edge_filters=filters_forwards, with_incoming='sources') rule_outgoing = UpdateRule(query_outgoing, max_iterations=1, track_edges=get_links) rules += [rule_outgoing] if links_forward and links_backward: rules += [RuleSetWalkers(stash)] if links_backward: query_incoming = orm.QueryBuilder() query_incoming.append(orm.Node, tag='sources') query_incoming.append(orm.Node, edge_filters=filters_backwards, with_outgoing='sources') rule_incoming = UpdateRule(query_incoming, max_iterations=1, track_edges=get_links) rules += [rule_incoming] rulesequence = RuleSequence(rules, max_iterations=max_iterations) results = rulesequence.run(basket) output = {} output['nodes'] = results.nodes.keyset output['links'] = None if get_links: output['links'] = results['nodes_nodes'].keyset return output
def test_stash(self): """Testing sequencies and 'stashing' Testing the dependency on the order of the operations in RuleSequence and the 'stash' functionality. This will be performed in a graph that has a calculation (calc_ca) with two input data nodes (data_i1 and data_i2) and two output data nodes (data_o1 and data_o2), and another calculation (calc_cb) which takes both one of the inputs and one of the outputs of the first one (data_i2 and data_o2) as inputs to produce a final output (data_o3). """ nodes = self._create_branchy_graph() basket = Basket(nodes=[nodes['data_1'].id]) queryb_inp = orm.QueryBuilder() queryb_inp.append(orm.Node, tag='nodes_in_set') queryb_inp.append(orm.Node, with_outgoing='nodes_in_set') uprule_inp = UpdateRule(queryb_inp) queryb_out = orm.QueryBuilder() queryb_out.append(orm.Node, tag='nodes_in_set') queryb_out.append(orm.Node, with_incoming='nodes_in_set') uprule_out = UpdateRule(queryb_out) expect_base = set([nodes['calc_1'].id, nodes['data_1'].id, nodes['calc_2'].id]) # First get outputs, then inputs. rule_seq = RuleSequence((uprule_out, uprule_inp)) obtained = rule_seq.run(basket.copy())['nodes'].keyset expected = expect_base.union(set([nodes['data_i'].id])) self.assertEqual(obtained, expected) # First get inputs, then outputs. rule_seq = RuleSequence((uprule_inp, uprule_out)) obtained = rule_seq.run(basket.copy())['nodes'].keyset expected = expect_base.union(set([nodes['data_o'].id])) self.assertEqual(obtained, expected) # Now using the stash option in either order. stash = basket.get_template() rule_save = RuleSaveWalkers(stash) rule_load = RuleSetWalkers(stash) # Checking whether Rule does the right thing # (i.e. If I stash the result, the operational sets should be the original, # set, whereas the stash contains the same data as the starting point) obtained = rule_save.run(basket.copy()) expected = basket.copy() self.assertEqual(obtained, expected) self.assertEqual(stash, basket) stash = basket.get_template() rule_save = RuleSaveWalkers(stash) rule_load = RuleSetWalkers(stash) serule_io = RuleSequence((rule_save, uprule_inp, rule_load, uprule_out)) result_io = serule_io.run(basket.copy())['nodes'].keyset self.assertEqual(result_io, expect_base) stash = basket.get_template() rule_save = RuleSaveWalkers(stash) rule_load = RuleSetWalkers(stash) serule_oi = RuleSequence((rule_save, uprule_out, rule_load, uprule_inp)) result_oi = serule_oi.run(basket.copy())['nodes'].keyset self.assertEqual(result_oi, expect_base)
def traverse_graph( starting_pks: Iterable[int], max_iterations: Optional[int] = None, get_links: bool = False, links_forward: Iterable[LinkType] = (), links_backward: Iterable[LinkType] = (), missing_callback: Optional[Callable[[Iterable[int]], None]] = None ) -> TraverseGraphOutput: """ This function will return the set of all nodes that can be connected to a list of initial nodes through any sequence of specified links. Optionally, it may also return the links that connect these nodes. :param starting_pks: Contains the (valid) pks of the starting nodes. :param max_iterations: The number of iterations to apply the set of rules (a value of 'None' will iterate until no new nodes are added). :param get_links: Pass True to also return the links between all nodes (found + initial). :param links_forward: List with all the links that should be traversed in the forward direction. :param links_backward: List with all the links that should be traversed in the backward direction. :param missing_callback: A callback to handle missing starting_pks or if None raise NotExistent """ # pylint: disable=too-many-locals,too-many-statements,too-many-branches if max_iterations is None: max_iterations = cast(int, inf) elif not (isinstance(max_iterations, int) or max_iterations is inf): raise TypeError('Max_iterations has to be an integer or infinity') linktype_list = [] for linktype in links_forward: if not isinstance(linktype, LinkType): raise TypeError( f'links_forward should contain links, but one of them is: {type(linktype)}' ) linktype_list.append(linktype.value) filters_forwards = {'type': {'in': linktype_list}} linktype_list = [] for linktype in links_backward: if not isinstance(linktype, LinkType): raise TypeError( f'links_backward should contain links, but one of them is: {type(linktype)}' ) linktype_list.append(linktype.value) filters_backwards = {'type': {'in': linktype_list}} if not isinstance(starting_pks, Iterable): # pylint: disable=isinstance-second-argument-not-valid-type raise TypeError( f'starting_pks must be an iterable\ninstead, it is {type(starting_pks)}' ) if any([not isinstance(pk, int) for pk in starting_pks]): raise TypeError( f'one of the starting_pks is not of type int:\n {starting_pks}') operational_set = set(starting_pks) if not operational_set: if get_links: return {'nodes': set(), 'links': set()} return {'nodes': set(), 'links': None} query_nodes = orm.QueryBuilder() query_nodes.append(orm.Node, project=['id'], filters={'id': { 'in': operational_set }}) existing_pks = set(query_nodes.all(flat=True)) missing_pks = operational_set.difference(existing_pks) if missing_pks and missing_callback is None: raise exceptions.NotExistent( f'The following pks are not in the database and must be pruned before this call: {missing_pks}' ) elif missing_pks and missing_callback is not None: missing_callback(missing_pks) rules = [] basket = Basket(nodes=existing_pks) # When max_iterations is finite, the order of traversal may affect the result # (its not the same to first go backwards and then forwards than vice-versa) # In order to make it order-independent, the result of the first operation needs # to be stashed and the second operation must be performed only on the nodes # that were already in the set at the begining of the iteration: this way, both # rules are applied on the same set of nodes and the order doesn't matter. # The way to do this is saving and seting the walkers at the right moments only # when both forwards and backwards rules are present. if links_forward and links_backward: stash = basket.get_template() rules += [RuleSaveWalkers(stash)] if links_forward: query_outgoing = orm.QueryBuilder() query_outgoing.append(orm.Node, tag='sources') query_outgoing.append(orm.Node, edge_filters=filters_forwards, with_incoming='sources') rule_outgoing = UpdateRule(query_outgoing, max_iterations=1, track_edges=get_links) rules += [rule_outgoing] if links_forward and links_backward: rules += [RuleSetWalkers(stash)] if links_backward: query_incoming = orm.QueryBuilder() query_incoming.append(orm.Node, tag='sources') query_incoming.append(orm.Node, edge_filters=filters_backwards, with_outgoing='sources') rule_incoming = UpdateRule(query_incoming, max_iterations=1, track_edges=get_links) rules += [rule_incoming] rulesequence = RuleSequence(rules, max_iterations=max_iterations) results = rulesequence.run(basket) output = {} output['nodes'] = results.nodes.keyset output['links'] = None if get_links: output['links'] = results['nodes_nodes'].keyset return cast(TraverseGraphOutput, output)