Exemple #1
0
    def handle_transform(self, data_container: DataContainer,
                         context: ExecutionContext):
        """
        Transform the data with the unions. It will make use of some parallel processing.

        :param data_container: data container
        :param context: execution context
        :return: the transformed data_inputs.
        """
        if self.n_jobs != 1:
            data_containers = Parallel(
                backend=self.backend,
                n_jobs=self.n_jobs)(delayed(step.handle_transform)(
                    data_container.copy(), context.push(step))
                                    for _, step in self.steps_as_tuple)
        else:
            data_containers = [
                step.handle_transform(data_container.copy(),
                                      context.push(step))
                for _, step in self.steps_as_tuple
            ]

        new_current_ids = self.hash(data_container)

        data_container = self.joiner.handle_transform(data_containers,
                                                      new_current_ids)

        return data_container
Exemple #2
0
    def handle_fit(self, data_container: DataContainer,
                   context: ExecutionContext):
        """
        Fit the parallel steps on the data. It will make use of some parallel processing.

        :param data_container: The input data to fit onto
        :param context: execution context
        :return: self
        """
        # Actually fit:
        if self.n_jobs != 1:
            fitted_steps_data_containers = Parallel(
                backend=self.backend,
                n_jobs=self.n_jobs)(delayed(step.handle_fit)(
                    data_container.copy(), context.push(step))
                                    for _, step in self.steps_as_tuple)
        else:
            fitted_steps_data_containers = [
                step.handle_fit(data_container.copy(), context.push(step))
                for _, step in self.steps_as_tuple
            ]

        # Save fitted steps
        for i, (fitted_step, _) in enumerate(fitted_steps_data_containers):
            self.steps_as_tuple[i] = (self.steps_as_tuple[i][0], fitted_step)
        self._refresh_steps()

        return self, data_container