Exemple #1
0
    def test_should_return_all_frames_when_no_predicate_is_applied(self):
        frame_1 = Frame(1, np.ones((1, 1)), None)
        frame_2 = Frame(1, 2 * np.ones((1, 1)), None)
        frame_3 = Frame(1, 3 * np.ones((1, 1)), None)
        outcome_1 = Prediction(frame_1, ["car", "bus"], [0.5, 0.6])
        outcome_2 = Prediction(frame_2, ["bus"], [0.5, 0.6])
        outcome_3 = Prediction(frame_3, ["car", "train"], [0.5, 0.6])
        batch = FrameBatch(
            frames=[
                frame_1,
                frame_2,
                frame_3,
            ],
            info=None,
            outcomes={"test": [outcome_1, outcome_2, outcome_3]})

        plan = type("ScanPlan", (), {"predicate": None})
        predicate_executor = SequentialScanExecutor(plan)
        predicate_executor.append_child(DummyExecutor([batch]))

        expected = FrameBatch(
            frames=[frame_1, frame_2, frame_3],
            info=None,
            outcomes={"test": [outcome_1, outcome_2, outcome_3]})
        filtered = list(predicate_executor.exec())[0]
        self.assertEqual(expected, filtered)
    def test_should_return_all_frames_when_no_predicate_is_applied(self):
        dataframe = create_dataframe(3)

        outcome_1 = Outcome(
            pd.DataFrame({
                'labels': ["car", "bus"],
                'scores': [0.5, 0.6]
            }), 'labels')
        outcome_2 = Outcome(pd.DataFrame({
            'labels': ["bus"],
            'scores': [0.5]
        }), 'labels')
        outcome_3 = Outcome(
            pd.DataFrame({
                'labels': ["car", "train"],
                'scores': [0.5, 0.6]
            }), 'labels')
        batch = Batch(frames=dataframe,
                      outcomes={"test": [outcome_1, outcome_2, outcome_3]})

        plan = type("ScanPlan", (), {"predicate": None})
        predicate_executor = SequentialScanExecutor(plan)
        predicate_executor.append_child(DummyExecutor([batch]))

        filtered = list(predicate_executor.exec())[0]
        self.assertEqual(batch, filtered)
Exemple #3
0
    def test_should_return_only_frames_satisfy_predicate(self):
        frame_1 = Frame(1, np.ones((1, 1)), None)
        frame_2 = Frame(1, 2 * np.ones((1, 1)), None)
        frame_3 = Frame(1, 3 * np.ones((1, 1)), None)
        outcome_1 = Prediction(frame_1, ["car", "bus"], [0.5, 0.6])
        outcome_2 = Prediction(frame_2, ["bus"], [0.5, 0.6])
        outcome_3 = Prediction(frame_3, ["car", "train"], [0.5, 0.6])
        batch = FrameBatch(
            frames=[
                frame_1,
                frame_2,
                frame_3,
            ],
            info=None,
            outcomes={"test": [outcome_1, outcome_2, outcome_3]})
        expression = type("AbstractExpression", (),
                          {"evaluate": lambda x: [False, False, True]})

        plan = type("ScanPlan", (), {"predicate": expression})
        predicate_executor = SequentialScanExecutor(plan)
        predicate_executor.append_child(DummyExecutor([batch]))

        expected = FrameBatch(frames=[frame_3],
                              info=None,
                              outcomes={"test": [outcome_3]})
        filtered = list(predicate_executor.exec())[0]
        self.assertEqual(expected, filtered)
    def test_should_return_only_frames_satisfy_predicate(self):
        dataframe = create_dataframe(3)

        outcome_1 = Outcome(
            pd.DataFrame({
                'labels': ["car", "bus"],
                'scores': [0.5, 0.6]
            }), 'labels')
        outcome_2 = Outcome(pd.DataFrame({
            'labels': ["bus"],
            'scores': [0.5]
        }), 'labels')
        outcome_3 = Outcome(
            pd.DataFrame({
                'labels': ["car", "train"],
                'scores': [0.5, 0.6]
            }), 'labels')
        batch = Batch(frames=dataframe,
                      outcomes={"test": [outcome_1, outcome_2, outcome_3]})
        expression = type("AbstractExpression", (),
                          {"evaluate": lambda x: [False, False, True]})

        plan = type("ScanPlan", (), {"predicate": expression})
        predicate_executor = SequentialScanExecutor(plan)
        predicate_executor.append_child(DummyExecutor([batch]))

        expected = batch[[2]]

        filtered = list(predicate_executor.exec())[0]
        self.assertEqual(expected, filtered)
Exemple #5
0
    def _build_execution_tree(self, plan: AbstractPlan) -> AbstractExecutor:
        """build the execution tree from plan tree

        Arguments:
            plan {AbstractPlan} -- Input Plan tree

        Returns:
            AbstractExecutor -- Compiled Execution tree
        """
        root = None
        if plan is None:
            return root

        # Get plan node type
        plan_node_type = plan.node_type

        if plan_node_type == PlanNodeType.SEQUENTIAL_SCAN:
            executor_node = SequentialScanExecutor(node=plan)
        elif plan_node_type == PlanNodeType.PP_FILTER:
            executor_node = PPExecutor(node=plan)
        elif plan_node_type == PlanNodeType.CREATE:
            executor_node = CreateExecutor(node=plan)
        elif plan_node_type == PlanNodeType.INSERT:
            executor_node = InsertExecutor(node=plan)
        elif plan_node_type == PlanNodeType.CREATE_UDF:
            executor_node = CreateUDFExecutor(node=plan)
        elif plan_node_type == PlanNodeType.LOAD_DATA:
            executor_node = LoadDataExecutor(node=plan)

        # Build Executor Tree for children
        for children in plan.children:
            executor_node.append_child(self._build_execution_tree(children))

        return executor_node
    def test_should_return_all_frames_when_no_predicate_is_applied(self):
        dataframe = create_dataframe(3)

        batch = Batch(frames=dataframe)

        plan = type("ScanPlan", (), {"predicate": None,
                                     "columns": None})
        predicate_executor = SequentialScanExecutor(plan)
        predicate_executor.append_child(DummyExecutor([batch]))

        filtered = list(predicate_executor.exec())[0]
        self.assertEqual(batch, filtered)
    def test_should_return_only_frames_satisfy_predicate(self):
        dataframe = create_dataframe(3)
        batch = Batch(frames=dataframe)
        expression = type("AbstractExpression", (),
                          {"evaluate": lambda x: Batch(
                              pd.DataFrame([False, False, True]))})

        plan = type("ScanPlan", (), {"predicate": expression,
                                     "columns": None})
        predicate_executor = SequentialScanExecutor(plan)
        predicate_executor.append_child(DummyExecutor([batch]))

        expected = Batch(batch[[2]].frames.reset_index(drop=True))
        filtered = list(predicate_executor.exec())[0]
        self.assertEqual(expected, filtered)
    def test_should_return_all_frames_when_no_predicate_is_applied(self):
        dataframe = create_dataframe(3)

        outcome_1 = Prediction(dataframe.iloc[0], ["car", "bus"], [0.5, 0.6])
        outcome_2 = Prediction(dataframe.iloc[1], ["bus"], [0.5, 0.6])
        outcome_3 = Prediction(dataframe.iloc[2], ["car", "train"], [0.5, 0.6])
        batch = FrameBatch(
            frames=dataframe,
            outcomes={"test": [outcome_1, outcome_2, outcome_3]})

        plan = type("ScanPlan", (), {"predicate": None})
        predicate_executor = SequentialScanExecutor(plan)
        predicate_executor.append_child(DummyExecutor([batch]))

        filtered = list(predicate_executor.exec())[0]
        self.assertEqual(batch, filtered)
    def test_should_return_projected_columns(self):
        dataframe = create_dataframe(3)

        batch = Batch(frames=dataframe)
        proj_batch = Batch(frames=pd.DataFrame(dataframe['data']))
        expression = [
            type(
                "AbstractExpression", (), {
                    "evaluate": lambda x: Batch(
                        pd.DataFrame(
                            x.frames['data']))})]

        plan = type("ScanPlan", (), {"predicate": None,
                                     "columns": expression})
        proj_executor = SequentialScanExecutor(plan)
        proj_executor.append_child(DummyExecutor([batch]))

        actual = list(proj_executor.exec())[0]
        self.assertEqual(proj_batch, actual)
Exemple #10
0
    def _build_execution_tree(self, plan: AbstractPlan) -> AbstractExecutor:
        """build the execution tree from plan tree

        Arguments:
            plan {AbstractPlan} -- Input Plan tree

        Returns:
            AbstractExecutor -- Compiled Execution tree
        """
        root = None
        if plan is None:
            return root

        # Get plan node type
        plan_opr_type = plan.opr_type

        if plan_opr_type == PlanOprType.SEQUENTIAL_SCAN:
            executor_node = SequentialScanExecutor(node=plan)
        elif plan_opr_type == PlanOprType.UNION:
            executor_node = UnionExecutor(node=plan)
        elif plan_opr_type == PlanOprType.STORAGE_PLAN:
            executor_node = StorageExecutor(node=plan)
        elif plan_opr_type == PlanOprType.PP_FILTER:
            executor_node = PPExecutor(node=plan)
        elif plan_opr_type == PlanOprType.CREATE:
            executor_node = CreateExecutor(node=plan)
        elif plan_opr_type == PlanOprType.INSERT:
            executor_node = InsertExecutor(node=plan)
        elif plan_opr_type == PlanOprType.CREATE_UDF:
            executor_node = CreateUDFExecutor(node=plan)
        elif plan_opr_type == PlanOprType.LOAD_DATA:
            executor_node = LoadDataExecutor(node=plan)
        elif plan_opr_type == PlanOprType.ORDER_BY:
            executor_node = OrderByExecutor(node=plan)
        elif plan_opr_type == PlanOprType.LIMIT:
            executor_node = LimitExecutor(node=plan)
        elif plan_opr_type == PlanOprType.SAMPLE:
            executor_node = SampleExecutor(node=plan)

        # Build Executor Tree for children
        for children in plan.children:
            executor_node.append_child(self._build_execution_tree(children))

        return executor_node