Esempio n. 1
0
    def test_mapper_from_graph(self):
        """A simple test case to check the working of mapper."""
        # A mock RDF object
        t = ComputationGraphGeneratorTest.Temp()

        # Head node
        hn = create_dummy_headnode(1)
        hn.backend = ComputationGraphGeneratorTest.TestBackend()
        node = Proxy.TransformationProxy(hn)
        # Set of operations to build the graph
        n1 = node.Define()
        n2 = node.Filter().Filter()
        n4 = n2.Count()
        n5 = n1.Count()
        n6 = node.Filter()  # noqa: avoid PEP8 F841

        # Generate and execute the mapper
        generator = ComputationGraphGenerator.ComputationGraphGenerator(
            node.proxied_node)
        mapper_func = generator.generate_computation_graph
        triggerables = mapper_func(t, 0)
        nodes = generator.get_action_nodes()

        reqd_order = [1, 3, 2, 2, 3, 2]

        self.assertEqual(t.ord_list, reqd_order)
        self.assertListEqual(nodes, [n5.proxied_node, n4.proxied_node])
        self.assertListEqual(triggerables, [t, t])
Esempio n. 2
0
    def execute_graph(self) -> None:
        """
        Executes an RDataFrame computation graph on a distributed backend.

        The needed ingredients are:

        - A collection of logical ranges in which the dataset is split. Each
          range is going to be assigned to a distributed task.
        - A representation of the computation graph that the task needs to
          execute.
        - A way to generate an RDataFrame instance starting from the logical
          range of the task.
        - Optionally, some setup code to be run at the beginning of each task.

        These are used as inputs to a generic mapper function. Results from the
        various mappers are then reduced and the final results are retrieved in
        the local session. These are properly handled to perform extra checks,
        depending on the data source. Finally, the local user-facing nodes are
        filled with the values that were computed distributedly so that they
        can be accessed in the application like with local RDataFrame.
        """
        # Check if the workflow must be generated in optimized mode
        optimized = ROOT.RDF.Experimental.Distributed.optimized

        if optimized:
            computation_graph_callable = ComputationGraphGenerator.get_callable_optimized()
        else:
            computation_graph_callable = partial(
                ComputationGraphGenerator.trigger_computation_graph, self._generate_graph_dict())

        mapper = partial(distrdf_mapper,
                         build_rdf_from_range=self._generate_rdf_creator(),
                         computation_graph_callable=computation_graph_callable,
                         initialization_fn=self.backend.initialization,
                         optimized=optimized)

        # Execute graph distributedly and return the aggregated results from all
        # tasks
        returned_values = self.backend.ProcessAndMerge(self._build_ranges(), mapper, distrdf_reducer)
        # Perform any extra checks that may be needed according to the
        # type of the head node
        final_values = self._handle_returned_values(returned_values)
        # List of action nodes in the same order as values
        local_nodes = self._get_action_nodes()
        # Set the value of every action node
        for node, value in zip(local_nodes, final_values):
            Utils.set_value_on_node(value, node, self.backend)
Esempio n. 3
0
    def test_mapper_with_pruning(self):
        """
        A test case to check that the mapper works even in the case of
        pruning.

        """
        # A mock RDF object
        t = ComputationGraphGeneratorTest.Temp()

        # Head node
        hn = create_dummy_headnode(1)
        hn.backend = ComputationGraphGeneratorTest.TestBackend()
        node = Proxy.TransformationProxy(hn)

        # Set of operations to build the graph
        n1 = node.Define()
        n2 = node.Filter().Filter()
        n4 = n2.Count()
        n5 = n1.Count()
        n6 = node.Filter()  # noqa: avoid PEP8 F841

        # Reason for pruning (change of reference)
        n5 = n1.Filter()  # noqa: avoid PEP8 F841

        # Generate and execute the mapper
        generator = ComputationGraphGenerator.ComputationGraphGenerator(
            node.proxied_node)
        # Prune first
        generator.headnode.graph_prune()
        mapper_func = generator.generate_computation_graph
        triggerables = mapper_func(t, 0)
        nodes = generator.get_action_nodes()

        reqd_order = [1, 2, 2, 2, 3, 2]

        self.assertEqual(t.ord_list, reqd_order)
        self.assertListEqual(nodes, [n4.proxied_node])
        self.assertListEqual(triggerables, [t])