Esempio n. 1
0
    def test_merge_column_wise_batch_frame(self):
        batch_1 = Batch(frames=pd.DataFrame([{'id': 0}]))
        batch_2 = Batch(frames=pd.DataFrame([{'data': 1}]))

        batch_3 = Batch.merge_column_wise([batch_1, batch_2])
        batch_4 = Batch(frames=pd.DataFrame([{'id': 0, 'data': 1}]))
        self.assertEqual(batch_3, batch_4)
    def exec(self, *args, **kwargs) -> Iterator[Batch]:

        outer = self.children[0]
        inner = self.children[1]

        for outer_batch in outer.exec():
            for result_batch in inner.exec(lateral_input=outer_batch):
                # merge
                result_batch = Batch.merge_column_wise(
                    [outer_batch, result_batch])
                result_batch = apply_predicate(result_batch, self.predicate)
                result_batch = apply_project(result_batch, self.join_project)
                if not result_batch.empty():
                    return result_batch
Esempio n. 3
0
    def evaluate(self, batch: Batch, **kwargs):
        new_batch = batch
        child_batches = \
            [child.evaluate(batch, **kwargs) for child in self.children]
        if len(child_batches):
            new_batch = Batch.merge_column_wise(child_batches)

        func = self._gpu_enabled_function()
        outcomes = func(new_batch.frames)
        outcomes = Batch(pd.DataFrame(outcomes))

        outcomes.modify_column_alias(self.alias)

        return outcomes.project(self.output_col_aliases)
Esempio n. 4
0
def apply_project(batch: Batch, project_list: List[AbstractExpression]):
    if not batch.empty() and project_list:
        batches = [expr.evaluate(batch) for expr in project_list]
        batch = Batch.merge_column_wise(batches)
    return batch