Exemple #1
0
    def test_data_provenance(self):
        """
        Creating a parent (Data) node.
        Attaching a sequence of Calculation/Data to create a "provenance".
        """
        from age.utils import create_tree
        created_dict = create_tree(self.DEPTH, self.NR_OF_CHILDREN)
        parent = created_dict['parent']
        desc_dict = created_dict['depth_dict']

        # Created all the nodes, tree. 
        #Now testing whether I find all the descendants
        # Using the utility function to create the starting entity set:
        es = get_basket(node_ids=(parent.id,))
        qb = QueryBuilder().append(Node).append(Node)

        for depth in range(0, self.DEPTH):
            #print('At depth {}'.format(depth))

            rule = UpdateRule(qb, mode=MODES.REPLACE, max_iterations=depth)
            res = rule.run(es.copy())['nodes']._set
            #print('   Replace-mode results: {}'.format(', '.join(map(str, sorted(res)))))
            should_set = desc_dict[depth]
            self.assertTrue(not(res.difference(should_set) or should_set.difference(res)))

            rule = UpdateRule(qb, mode=MODES.APPEND, max_iterations=depth)
            res = rule.run(es.copy())['nodes']._set
            #print('   Append-mode  results: {}'.format(', '.join(map(str, sorted(res)))))
            should_set = set()
            [[should_set.add(s) for s in desc_dict[d]] for d in range(depth+1)]

            self.assertTrue(not(res.difference(should_set) or should_set.difference(res)))
Exemple #2
0
    def runTest(self):
        """
        Testing whether groups and nodes can be traversed with the Graph explorer:
        """
        # I create a certain number of groups and save them in this list:
        groups = []
        for igroup in range(self.N_GROUPS):
            name='g-{}'.format(igroup) # Name has to be unique
            groups.append(Group(name=name).store())
        # Same with nodes: Create 1 node less than I have groups
        nodes = []
        for inode in range(1, self.N_GROUPS):
            d = Data().store()
            # The node I create, I added both to the group of
            # same index and the group of index - 1
            groups[inode].add_nodes(d)
            groups[inode-1].add_nodes(d)
            nodes.append(d)

        # Creating sets for the test:
        nodes_set = set([n.id for n in nodes])
        groups_set = set([g.id for g in groups])

        # Now I want rule that gives me all the data starting
        # from the last node, with links being
        # belonging to the same group:
        qb = QueryBuilder()
        qb.append(Data, tag='d')
        # ~ qb.append(Group, with_node='d', tag='g', filters={'type':''} ) # The filter here is
        qb.append(Group, group_of='d', tag='g', filters={'type':''} ) # The filter here is
        # there for avoiding problems with autogrouping. Depending how the test
        # exactly is run, nodes can be put into autogroups.
        qb.append(Data, member_of='g')
        # ~ qb.append(Data, with_group='g')

        es = get_basket(node_ids=(d.id,))
        rule = UpdateRule(qb, max_iterations=np.inf)
        res = rule.run(es.copy())['nodes']._set
        # checking whether this updateRule above really visits all the nodes I created:
        self.assertEqual(res, nodes_set)
        # The visits:
        self.assertEqual(rule.get_visits()['nodes']._set,res)

        # I can do the same with 2 rules chained into a RuleSequence:
        qb1=QueryBuilder().append(Node, tag='n').append(
                Group, group_of='n', filters={'type':''})
                # ~ Group, with_node='n', filters={'type':''})
        qb2=QueryBuilder().append(Group, tag='n').append(
                Node, member_of='n')
                # ~ Node, with_group='n')
        rule1 = UpdateRule(qb1)
        rule2 = UpdateRule(qb2)
        seq = RuleSequence((rule1, rule2), max_iterations=np.inf)
        res = seq.run(es.copy())
        for should_set, is_set in (
                (nodes_set.copy(), res['nodes']._set),
                (groups_set,res['groups']._set)):
            self.assertEqual(is_set, should_set)
Exemple #3
0
    def runTest(self):
        """
        Testing whether nodes (and nodes) can be traversed with the Graph explorer,
        with the links being stored
        """
        from age.utils import create_tree
        # I create a certain number of groups and save them in this list:
        created_dict = create_tree(self.DEPTH, self.NR_OF_CHILDREN, draw=True)
        instances = created_dict['instances']
        adjacency = created_dict['adjacency']

        es = get_basket(node_ids=(created_dict['parent'].id,))


        qb = QueryBuilder().append(Node).append(Node)

        rule = UpdateRule(qb, mode=MODES.APPEND, max_iterations=self.DEPTH-1, track_edges=True)
        res = rule.run(es.copy())
        #print('   Append-mode  results: {}'.format(', '.join(map(str, sorted(res)))))
        should_set = set()
        [[should_set.add(s) for s in created_dict['depth_dict'][d]] for d in range(self.DEPTH)]

        self.assertEqual(res['nodes']._set, should_set) #) or should_set.difference(res)))


        touples_should = set((instances[i],instances[j]) for  i, j in zip(*np.where(adjacency)))
        touples_are = set(zip(*zip(*res['nodes_nodes']._set)[:2]))

        self.assertEqual(touples_are, touples_should)


        rule = UpdateRule(qb, mode=MODES.REPLACE, max_iterations=self.DEPTH-1, track_edges=True)
        res = rule.run(es.copy())
        # Since I apply the replace rule, the last set of links should appear:

        instances = created_dict['instances']
        adjacency = created_dict['adjacency']

        touples_should = set()
        [touples_should.add((pk1, pk2))
                for idx1,pk1 in enumerate(instances)
                for idx2,pk2 in enumerate(instances)
                if adjacency[idx1, idx2]
                and pk1 in created_dict['depth_dict'][self.DEPTH-2]
                and pk2 in created_dict['depth_dict'][self.DEPTH-1]
            ]

        touples_are = set(zip(*zip(*res['nodes_nodes']._set)[:2]))
        self.assertEqual(touples_are, touples_should)
Exemple #4
0
 def test_cycle(self):
     """
     Creating a cycle: A data-instance is both input to and returned by a WorkFlowNode
     """
     d = Data().store()
     c = WorkCalculation().store()
     # New provenance design branch
     # ~ c.add_incoming(d, link_type=LinkType.INPUT_WORK, link_label='lala')
     # ~ d.add_incoming(c, link_type=LinkType.RETURN, link_label='lala')
     c.add_link_from(d, link_type=LinkType.INPUT, label='lala')
     d.add_link_from(c, link_type=LinkType.RETURN, label='lala')
     qb = QueryBuilder().append(Node).append(Node)
     rule = UpdateRule(qb, max_iterations=np.inf)
     es = get_basket(node_ids=(d.id,))
     res = rule.run(es)
     self.assertEqual( res['nodes']._set, set([d.id, c.id]))
Exemple #5
0
    def test_stash(self):
        """
        Here I'm testing the 'stash'
        """
        # creatin a first calculation with 3 input data:
        c = Calculation().store()
        dins = set() # To compare later, dins is a set of the input data pks.
        for i in range(3):
            data_in = Data().store()
            dins.add(data_in.id)
            # ~ c.add_incoming(data_in, 
                    # ~ link_type=LinkType.INPUT_CALC,
                    # ~ link_label='lala-{}'.format(i))
            c.add_link_from(data_in, 
                    link_type=LinkType.INPUT,
                    label='lala-{}'.format(i))

        # Creating output data to that calculation:
        douts = set() # Similar to dins, this is the set of data output pks
        for i in range(4):
            data_out = Data().store()
            douts.add(data_out.id)
            # ~ data_out.add_incoming(c,
                    # ~ link_type=LinkType.CREATE,
                    # ~ link_label='lala-{}'.format(i))
            data_out.add_link_from(c,
                    link_type=LinkType.CREATE,
                    label='lala-{}'.format(i))
        #print(draw_children

        # adding another calculation, with one input from c's outputs,
        # and one input from c's inputs
        c2 = Calculation().store()
        # ~ c2.add_incoming(data_in, link_type=LinkType.INPUT_CALC, link_label='b')
        # ~ c2.add_incoming(data_out, link_type=LinkType.INPUT_CALC, link_label='c')
        c2.add_link_from(data_in, link_type=LinkType.INPUT, label='b')
        c2.add_link_from(data_out, link_type=LinkType.INPUT, label='c')


        # ALso here starting with a set that only contains the starting the calculation:
        es = get_basket(node_ids=(c.id,))
        # Creating the rule for getting input nodes:
        rule_in = UpdateRule(QueryBuilder().append(
                Node, tag='n').append(
                Node, input_of='n'))
        # ~ rule_in = UpdateRule(QueryBuilder().append(
                # ~ Node, tag='n').append(
                # ~ Node, with_outgoing='n'))
        # Creating the rule for getting output nodes
        rule_out = UpdateRule(QueryBuilder().append(
                Node, tag='n').append(
                Node, output_of='n'))
                # ~ Node, with_incoming='n'))
        #, edge_filters={'type':LinkType.CREATE.value}))


        # I'm testing the input rule. Since I'm updating, I should
        # have the input and the calculation itself:
        is_set = rule_in.run(es.copy())['nodes']._set
        self.assertEqual(is_set, dins.union({c.id}))

        # Testing the output rule, also here, output + calculation c is expected:
        is_set = rule_out.run(es.copy())['nodes']._set
        self.assertEqual(is_set, douts.union({c.id}))

        # Now I'm  testing the rule sequence.
        # I first apply the rule to get outputs, than the rule to get inputs
        rs1 = RuleSequence((rule_out, rule_in))
        is_set = rs1.run(es.copy())['nodes']._set
        # I expect the union of inputs, outputs, and the calculation:
        self.assertEqual(is_set, douts.union(dins).union({c.id}))

        # If the order of the rules is exchanged, I end up of also attaching c2 to the results.
        # This is because c and c2 share one data-input:
        rs2 = RuleSequence((rule_in, rule_out))
        is_set = rs2.run(es.copy())['nodes']._set
        self.assertEqual(is_set, douts.union(dins).union({c.id, c2.id}))

        # Testing similar rule, but with the possibility to stash the results:
        stash = es.copy(with_data=False)
        rsave = RuleSaveWalkers(stash)
        # Checking whether Rule does the right thing i.e If I stash the result,
        # the active walkers should be an empty set:
        self.assertEqual(rsave.run(es.copy()), es.copy(with_data=False))
        # Whereas the stash contains the same data as the starting point:
        self.assertEqual(stash,es)
        rs2 = RuleSequence((
                RuleSaveWalkers(stash), rule_in,
                RuleSetWalkers(stash) ,rule_out))
        is_set = rs2.run(es.copy())['nodes']._set
        # NOw I test whether the stash does the right thing,
        # namely not including c2 in the results:
        self.assertEqual(is_set, douts.union(dins).union({c.id}))