Example #1
0
    def _fit_transform_rest(self, X, transformed_part, is_fit, is_transform):
        """ method to take care of the rest of data, that wasn't transformed,
        it can either be 
        * dropped (default) :  'keep_other_columns' == 'drop'
        * kept as is        :  'keep_other_columns' == 'keep'
        * keep only not used columns 'keep_other_columns' == 'delta'

        """
        if self.keep_other_columns == "keep":
            # In that case I'll keep the original columns as well
            if is_fit:
                if hasattr(X, "columns"):
                    self._Xcolumns = list(getattr(X, "columns"))
                elif hasattr(X, "shape"):
                    self._Xcolumns = [i for i in range(X.shape[1])]
                else:
                    self._Xcolumns = None

            if is_transform:
                kept_features_names = self._get_rest_columns()
                Xcomplete_result = dsh.generic_hstack(
                    [X, transformed_part],
                    output_type=self.desired_output_type,
                    all_columns_names=[kept_features_names, self._feature_names_for_transform],
                )

                return Xcomplete_result
            else:
                return self

        elif self.keep_other_columns == "drop":

            return None

        # "delta' mode, I'll keep only the columns that were not used
        if self.columns_to_use is None:
            return transformed_part

        if is_fit and is_transform:
            self.anti_selector = ColumnsSelector(columns_to_drop=self.columns_to_use, regex_match=self.regex_match)
            Xother = self.anti_selector.fit_transform(X)

        elif is_transform:
            Xother = self.anti_selector.transform(X)

        elif is_fit:
            self.anti_selector = ColumnsSelector(columns_to_drop=self.columns_to_use, regex_match=self.regex_match)
            self.anti_selector.fit(X)

        if is_transform:
            kept_features_names = self._get_rest_columns()
            return dsh.generic_hstack(
                [Xother, transformed_part],
                output_type=self.desired_output_type,
                all_columns_names=[kept_features_names, self._feature_names_for_transform],
            )

            # Rmk : generic_hstack will handle the case where Xother has no columns
        else:
            return self
def test_generic_hstack():
    df1 = pd.DataFrame({"a": list(range(10)), "b": ["aaaa", "bbbbb", "cccc"] * 3 + ["ezzzz"]})
    df2 = pd.DataFrame({"c": list(range(10)), "d": ["aaaa", "bbbbb", "cccc"] * 3 + ["ezzzz"]})

    df12 = generic_hstack((df1, df2))
    assert get_type(df12) == DataTypes.DataFrame
    assert df12.shape == (10, 4)
    assert list(df12.columns) == ["a", "b", "c", "d"]

    df1 = pd.DataFrame({"a": list(range(10)), "b": ["aaaa", "bbbbb", "cccc"] * 3 + ["ezzzz"]})
    df2 = pd.DataFrame(
        {"c": list(range(10)), "d": ["aaaa", "bbbbb", "cccc"] * 3 + ["ezzzz"]},
        index=[1, 3, 5, 7, 9, 11, 13, 15, 17, 19],
    )

    df12 = generic_hstack((df1, df2))
    assert np.array_equal(df12.index.values, np.array([1, 3, 5, 7, 9, 11, 13, 15, 17, 19]))
    assert get_type(df12) == DataTypes.DataFrame
    assert df12.shape == (10, 4)
    assert list(df12.columns) == ["a", "b", "c", "d"]

    df12 = generic_hstack((df1, df2), output_type=DataTypes.NumpyArray)
    assert get_type(df12) == DataTypes.NumpyArray
    assert df12.shape == (10, 4)

    with pytest.raises(ValueError):
        generic_hstack((df1.head(3), df2.head(4)))

    with pytest.raises(ValueError):
        generic_hstack((df1.head(3).values, df2.head(4)))

    with pytest.raises(ValueError):
        generic_hstack((df1.head(3).values, df2.head(4).values))
Example #3
0
    def _transform_aggregat(self, X, target_aggregat, target_aggregat_global):

        all_results = []
        for col in self._columns_to_encode:

            if self._na_to_null[col]:
                Xcol = self.na_remplacing(X[col])
            else:
                Xcol = X[col]

            result = Xcol.apply(lambda x: self.get_value(x, target_aggregat[col], target_aggregat_global[col]))
            # result.columns = ["%s__%s" % (col,c) for c in result.columns]
            all_results.append(result)

            assert len(result) == len(X)
            assert len(result.shape) == 2

        if len(all_results) == 0:
            if len(self._columns_to_keep) > 0:
                result_other = X.loc[:, self._columns_to_keep]
                return result_other
            else:
                return pd.DataFrame(index=range(X.shape[0]), columns=[])  # empty DataFrame

        all_results = pd.concat(all_results, axis=1)

        assert (all_results.index == X.index).all()

        if len(self._columns_to_keep) > 0:
            result_other = X.loc[:, self._columns_to_keep]
            return generic_hstack([result_other, all_results])
        else:
            return all_results
Example #4
0
    def transform(self, X):

        if get_type(X) != DataTypes.DataFrame:
            raise TypeError("X should be a DataFrame")

        result = self._transform_to_encode(X)

        if len(self._columns_to_keep) > 0:
            result_other = X.loc[:, self._columns_to_keep]
            return generic_hstack([result_other, result])
        else:
            return result
def test_generic_hstack_sparse_and_category(with_cat, force_sparse):
    
    df = pd.DataFrame({"a":10+np.arange(10),"b":np.random.randn(10)})
    if with_cat:
        df["a"] = df["a"].astype("category")

    xx = convert_to_sparsearray(np.random.randint(0,1, size=(10,2)))

    concat = generic_hstack((df,xx), max_number_of_cells_for_non_sparse = 10 + (1-force_sparse) * 1000000)    
    
    assert concat.shape == (df.shape[0] , df.shape[1] + xx.shape[1])
    if force_sparse:
        assert get_type(concat) == DataTypes.SparseArray

    elif with_cat:
        assert concat.dtypes["a"] == "category"
        assert isinstance(concat, pd.DataFrame)
Example #6
0
    def _fit_transform(self, X, y, is_fit, is_transform, fit_params=None):
        """ internal method that handle the fit and the transform """

        if fit_params is None:
            fit_params = {}

        if is_fit:
            if isinstance(self.columns_to_use, str) and self.columns_to_use == "auto":
                columns = self._get_default_columns_to_use(X, y)
                self.selector = ColumnsSelector(columns_to_use=columns)
            else:
                self.selector = ColumnsSelector(columns_to_use=self.columns_to_use, regex_match=self.regex_match)

        if hasattr(X, "shape"):
            if X.shape[0] == 0:
                raise ValueError("the X object has 0 rows")

        Xindex = dsh._get_index(X)  # if X has an index retrieve it
        #        if self.columns_to_use is not None:
        if is_fit:
            Xsubset = self.selector.fit_transform(X)
        else:
            Xsubset = self.selector.transform(X)
        # TODO (maybe): here allow a preprocessing pipeline
        #        if self.has_preprocessing:
        #            if is_fit:
        #                self.preprocessing = self._get_preprocessing()
        #                Xsubset = self.preprocessing.fit_transform(Xsubset)
        #            else:
        #                Xsubset = self.preprocessing.transform(Xsubset)

        # Store columns and shape BEFORE any modification
        if self.selector is not None:
            Xsubset_columns = self.selector.get_feature_names()
        else:
            raise NotImplementedError("should not go there anymore")
            # Xsubset_columns = getattr(Xsubset, "columns", None)

        Xsubset_shape = getattr(Xsubset, "shape", None)
        # TODO : ici utiliser d'une facon ou d'une autre un '
        # https://github.com/scikit-learn/scikit-learn/issues/6425

        if is_fit:
            self._expected_type = dsh.get_type(Xsubset)
            self._expected_nbcols = dsh._nbcols(Xsubset)
            self._expected_columns = dsh._get_columns(Xsubset)

        else:
            Xtype = dsh.get_type(Xsubset)
            if Xtype != self._expected_type:
                raise ValueError(
                    "I don't have the correct type as input, expected : %s, got : %s" % (self._expected_type, Xtype)
                )

            nbcols = dsh._nbcols(Xsubset)
            if nbcols != self._expected_nbcols:
                raise ValueError(
                    "I don't have the correct nb of colmns as input, expected : %d, got : %d"
                    % (self._expected_nbcols, nbcols)
                )

            columns = dsh._get_columns(Xsubset)
            expected_columns = getattr(self, "_expected_columns", None)  # to allow pickle compatibility

            if expected_columns is not None and columns is not None and columns != self._expected_columns:
                raise ValueError("I don't have the correct names of columns")

        if self.accepted_input_types is not None and self._expected_type not in self.accepted_input_types:
            Xsubset = dsh.convert_generic(
                Xsubset, mapped_type=self._expected_type, output_type=self.accepted_input_types[0]
            )

        if is_fit:
            self._verif_params()
            self._empty_data = False
            s = getattr(Xsubset, "shape", None)
            if s is not None and len(s) > 1 and s[1] == 0:
                self._empty_data = True

        if self.all_columns_at_once or self._empty_data:

            if is_fit:
                self._model = self._get_model(Xsubset, y)

            ##############################################
            ### Apply the model on ALL columns at ONCE ###
            ##############################################

            if self.work_on_one_column_only:
                Xsubset = dsh.make1dimension(Xsubset)  # will generate an error if 2 dimensions
            else:
                Xsubset = dsh.make2dimensions(Xsubset)

            # Call to underlying model
            Xres = None
            if is_fit and is_transform:
                ##############################
                ###  fit_transform method  ###
                ##############################
                # test if the the data to transform actually has some columns

                if not self._empty_data:
                    # normal case
                    Xres = self._model.fit_transform(Xsubset, y, **fit_params)
                else:
                    # It means there is no columns to transform
                    Xres = Xsubset  # don't do anything

            elif is_fit and not is_transform:
                ####################
                ###  fit method  ###
                ####################
                if self.must_transform_to_get_features_name:
                    Xres = self._model.fit_transform(Xsubset, y, **fit_params)
                else:
                    self._model.fit(Xsubset, y, **fit_params)
            else:
                ####################
                ###  transform   ###
                ####################
                if not self._empty_data:
                    Xres = self._model.transform(Xsubset)
                else:
                    Xres = Xsubset

            if is_fit:
                self._columns_informations = {
                    "output_columns": getattr(Xres, "columns", None),  # names of transformed columns if exist
                    "output_shape": getattr(Xres, "shape", None),  # shape of transformed result if exist
                    "input_columns": Xsubset_columns,  # name of input columns
                    "input_shape": Xsubset_shape,  # shape of input data
                }

                self._feature_names_for_transform = self.try_to_find_feature_names_all_at_once(
                    output_columns=self._columns_informations["output_columns"],
                    output_shape=self._columns_informations["output_shape"],
                    input_columns=self._columns_informations["input_columns"],
                    input_shape=self._columns_informations["input_shape"],
                )

                # self.kept_features_names = None  # for now

            if is_transform:
                Xres = dsh.convert_generic(Xres, output_type=self.desired_output_type)
                Xres = dsh._set_index(Xres, Xindex)

        else:
            ########################################
            ### Apply the model COLUMN BY COLUMN ###
            ########################################
            if is_fit:
                self._models = []

            if is_transform or self.must_transform_to_get_features_name:
                all_Xres = []
            else:
                all_Xres = None

            Xsubset = dsh.make2dimensions(Xsubset)

            for j in range(self._expected_nbcols):

                if self._expected_type in (DataTypes.DataFrame, DataTypes.SparseDataFrame, DataTypes.Serie):
                    Xsubset_j = Xsubset.iloc[:, j]
                else:
                    Xsubset_j = Xsubset[:, j]

                if is_fit:
                    sub_model = self._get_model(Xsubset, y)
                    self._models.append(sub_model)
                else:
                    sub_model = self._models[j]

                if not self.work_on_one_column_only:
                    Xsubset_j = dsh.make2dimensions(Xsubset_j)

                if is_fit and is_transform:
                    # fit_transform method
                    Xres_j = sub_model.fit_transform(Xsubset_j, y, **fit_params)

                    all_Xres.append(Xres_j)

                elif is_fit and not is_transform:
                    # fit method
                    if self.must_transform_to_get_features_name:
                        Xres_j = sub_model.fit_transform(Xsubset_j, y, **fit_params)
                        all_Xres.append(Xres_j)

                    else:
                        sub_model.fit(Xsubset_j, y, **fit_params)

                elif is_transform:
                    # transform method

                    Xres_j = sub_model.transform(Xsubset_j)
                    all_Xres.append(Xres_j)

            if is_fit:

                self._columns_informations = {
                    "all_output_columns": None
                    if all_Xres is None
                    else [getattr(Xres, "columns", None) for Xres in all_Xres],
                    "all_output_shape": None
                    if all_Xres is None
                    else [getattr(Xres, "shape", None) for Xres in all_Xres],
                    "input_columns": Xsubset_columns,  # name of input columns
                    "input_shape": Xsubset_shape,  # shape of input data
                }

                self._feature_names_for_transform = list(
                    self.try_to_find_feature_names_separate(
                        all_output_columns=self._columns_informations["all_output_columns"],
                        all_output_shape=self._columns_informations["all_output_shape"],
                        input_columns=self._columns_informations["input_columns"],
                        input_shape=self._columns_informations["input_shape"],
                    )
                )

                # self.kept_features_names = None  # for now

            if is_transform:
                Xres = dsh.generic_hstack(all_Xres, output_type=self.desired_output_type)
                Xres = dsh._set_index(Xres, Xindex)

        if is_transform:
            if self._feature_names_for_transform is not None:
                ### LA ca marche pas en transform !!!
                Xres = dsh._set_columns(Xres, self._feature_names_for_transform)

        if is_transform:
            return Xres
        else:
            return self
Example #7
0
    def _fit_transform_rest(self, X, transformed_part, is_fit, is_transform):
        """ 
        method to take care of the what to do with wasn't transformed, there are 2 part of the data:
            * the part that was used by the transformer : columns_to_use
            * the part that wasn't used by the transformer : the rest

        We can keep or drop those 2 parts
        """
        #        There are four possibilities :
        #            * drop_unused_columns = True and drop_used_columns = True
        #                => nothing to do. Nothing to ADD to the result of the wrapped transformer
        #
        #            * drop_unused_columns = True and drop_used_columns = False
        #                => We need to add the 'un-transformed' data to the result
        #                => Add an 'anti-selector' with 'columns_to_drop' = 'columns_to_use'
        #                   This will selector the rest of the columns
        #
        #            * drop_unused_columns = False and drop_used_columns = True
        #                => We need to add the 'transformed' part of the data
        #                => Add a 'selector' : with 'columns_to_use' = 'columns_to_use'
        #
        #           * drop_unused_columns = False and drop_used_columns = True
        #                => We need to add the full data
        #                => ... don't add a selector (or a selector with columns_to_use = None)
        #
        #
        if is_fit:
            self.other_selector = None

        if self.drop_unused_columns and self.drop_used_columns:
            # Nothing to do
            if is_transform:
                return transformed_part
            else:
                return None

        if is_fit:

            if hasattr(X, "columns"):
                self._Xcolumns = list(getattr(X, "columns"))
            elif hasattr(X, "shape"):
                self._Xcolumns = [i for i in range(X.shape[1])]
            else:
                self._Xcolumns = None

            if not self.drop_used_columns and self.drop_unused_columns:
                self.other_selector = ColumnsSelector(columns_to_use=self.columns_to_use, regex_match=self.regex_match)

            elif self.drop_used_columns and not self.drop_unused_columns:
                self.other_selector = ColumnsSelector(columns_to_drop=self.columns_to_use, regex_match=self.regex_match)

            elif not self.drop_used_columns and not self.drop_unused_columns:
                self.other_selector = ColumnsSelector(columns_to_use="all")
                # Maybe we  can 'by-pass' this
            else:
                self.other_selector = None  # we never go there, already out of the function

        if is_fit and is_transform:
            Xother = self.other_selector.fit_transform(X)

        elif is_transform:
            Xother = self.other_selector.transform(X)

        elif is_fit:
            self.other_selector.fit(X)

        if is_transform:
            kept_features_names = self.other_selector.get_feature_names()
            return dsh.generic_hstack(
                [Xother, transformed_part],
                output_type=self.desired_output_type,
                all_columns_names=[kept_features_names, self._feature_names_for_transform],
            )
        else:
            return self
Example #8
0
    def _approx_cross_validation_pre_calculation(
        self,
        X,
        y,
        groups,
        scoring,
        cv,
        verbose,
        fit_params_step,
        return_predict,
        method,
        no_scoring,
        stopping_round,
        stopping_threshold,
        nodes_not_to_crossvalidate,
        nodes_cant_cv_transform,
        kwargs_step,
    ):
        """ sub-method to loop through the nodes of the pipeline and pre-compute everything that can be pre-computed """

        data_dico = {}  # Will contain transformed blocks at each node

        nodes_done = set()
        for node in self._nodes_order:

            concat_at_this_node = self.no_concat_nodes is None or node not in self.no_concat_nodes
            if not concat_at_this_node:
                raise NotImplementedError(
                    "Approx cross-validation does't work if no concatenation (node %s)"
                    % str(node))

            nodes_done.add(node)

            if self.verbose:
                print("start processing node %s ..." % node)

            ### Debugging Help ###
            # if getattr(self,"_return_before_node",None) is not None and getattr(self,"_return_before_node",None) == node:
            #    return data_dico

            model = self._models[node]

            predecessors = list(self.complete_graph.predecessors(node))
            # Carefull : here it is not necessary always in the same order

            #### I'll use the order in which the edges were given

            # Concatenation : alphabetical order

            if len(predecessors) == 0:
                #########################
                ###  No predecessors  ###
                #########################

                # ==> Apply on original data
                lastX = X

            elif len(predecessors) == 1:
                ########################
                ###  One predecessor ###
                ########################

                # ==> Apply on data coming out of last node
                lastX = data_dico[predecessors[0]]
                # data_dico[node] = model.fit_transform(lastX, y, **fit_params_step[node] )

            elif len(predecessors) > 1:
                #######################
                ###  More than one  ###
                #######################
                # ==> concat all the predecessors node and apply it

                ### Fix concatenation order ###
                edges_number = self._get_edges_number(predecessors, node)
                predecessors = sorted(predecessors,
                                      key=lambda p:
                                      (edges_number.get(p, -1), p))
                self._all_concat_order[node] = predecessors

                all_lastX = [
                    data_dico[predecessor] for predecessor in predecessors
                ]

                if self.verbose:
                    print("start aggregation...")

                # if do_fit:
                output_type = guess_output_type(all_lastX)
                self._all_concat_type[node] = output_type
                # else:
                #    output_type = self._all_concat_type[node]
                has_none = False
                for x in all_lastX:
                    if x is None:
                        has_none = True
                        break

                # None in all_lastX

                if has_none:
                    lastX = None
                else:
                    lastX = generic_hstack(all_lastX, output_type=output_type)

            if node != self._terminal_node and lastX is not None:
                # This is not the end of the graph

                if node not in nodes_not_to_crossvalidate and node not in nodes_cant_cv_transform:
                    ### 1) Node should BE crossvalitaded  ...
                    ### 2) ... and we CAN use 'cv_transform'

                    if self.verbose:
                        print("do crossvalidation on %s" % node)

                    _, data_dico[node] = cross_validation(
                        model,
                        lastX,
                        y,
                        groups=groups,
                        cv=cv,
                        verbose=verbose,
                        fit_params=fit_params_step[node],
                        return_predict=True,
                        method="transform",
                        no_scoring=True,
                        stopping_round=None,
                        stopping_threshold=None,
                        **kwargs_step[node])

                elif node not in nodes_not_to_crossvalidate and node in nodes_cant_cv_transform:
                    ### 1) Node should BE crossvalitated ...
                    ### 2) ... but we can't use 'cv_transform'

                    if self.verbose:
                        print("can't do node %s" % node)
                    data_dico[node] = None  # Can't compute this node

                else:
                    ### Node that shouldn't be cross-validated ###

                    if self.verbose:
                        print("skip crossvalidation on %s" % node)
                    cloned_model = clone(model)
                    if groups is not None and function_has_named_argument(
                            cloned_model.fit_transform, "groups"):
                        data_dico[node] = cloned_model.fit_transform(
                            lastX, y, groups, **fit_params_step[node])
                    else:
                        data_dico[node] = cloned_model.fit_transform(
                            lastX, y, **fit_params_step[node])

            elif lastX is not None:

                ### CV no matter what at the last node ###

                #                if node not in nodes_not_to_crossvalidate and node not in nodes_cant_cv_transform:
                #
                #                    # This is the last node of the Graph
                #                    result = approx_cross_validation( model, lastX, y, groups = groups, scoring = scoring, cv = cv ,
                #                                                verbose = verbose, fit_params = fit_params_step[node],
                #                                                return_predict = return_predict , method = method, no_scoring = no_scoring,
                #                                                stopping_round = stopping_round, stopping_threshold = stopping_threshold,
                #                                                **kwargs_step[node])
                #
                #                elif node not in nodes_not_to_crossvalidate and node in nodes_cant_cv_transform:
                #                    pass
                #
                #                else:

                # This is the last node of the Graph
                result = cross_validation(
                    model,
                    lastX,
                    y,
                    groups=groups,
                    scoring=scoring,
                    cv=cv,
                    verbose=verbose,
                    fit_params=fit_params_step[node],
                    return_predict=return_predict,
                    method=method,
                    no_scoring=no_scoring,
                    stopping_round=stopping_round,
                    stopping_threshold=stopping_threshold,
                    **kwargs_step[node])

                # Rmk : if we do that so column regarding the time of fit are 'false' : they will only account for the time spent in the last node

                return True, data_dico, result
            #                return result

            else:
                ###
                if self.verbose:
                    print("can't compute node %s because lastX is None" % node)
                data_dico[node] = None
                # return result

        return False, data_dico, None  # None : no result yet
Example #9
0
    def _fit_transform(self,
                       X,
                       y=None,
                       groups=None,
                       method=None,
                       fit_params=None):
        """ main method of GraphPipeline, handles the fit and predict of object """
        do_fit = method in ("fit", "fit_transform", "fit_predict")

        if not self._already_fitted and not do_fit:
            raise NotFittedError("Please fit the model before")

        # Split fit_params into a 'step-by-step' dictionnary
        fit_params_step = {name: {} for name in self.complete_graph.nodes}
        if fit_params is not None:
            for key, value in fit_params.items():
                step, param = key.split("__", 1)
                fit_params_step[step][param] = value

        data_dico = {}  # Will contain transformed blocks at each node
        feature_dico = {}  # Will contain the get_feature_names() of each node

        if do_fit:
            input_features = getattr(X, "columns", None)
            if input_features is not None:
                input_features = list(input_features)

            self._Xinput_features = input_features

        else:
            input_features = self._Xinput_features

        nodes_done = set()
        for node in self._nodes_order:

            nodes_done.add(node)

            if self.verbose:
                print("start processing node %s ..." % node)

            ### Debugging Help ###
            if (getattr(self, "_return_before_node", None) is not None
                    and getattr(self, "_return_before_node", None) == node):
                return data_dico

            model = self._models[node]

            predecessors = list(self.complete_graph.predecessors(node))
            # Carefull : here it is not necessary always in the same order

            #### I'll use the order in which the edges were given

            # Concatenation : alphabetical order
            concat_at_this_node = self.no_concat_nodes is None or node not in self.no_concat_nodes

            if len(predecessors) == 0:
                #########################
                ###  No predecessors  ###
                #########################
                if concat_at_this_node:
                    lastX = X

                else:
                    lastX = {"_data": X}
                # ==> Apply on original data

                last_features = input_features

            elif len(predecessors) == 1:
                ########################
                ###  One predecessor ###
                ########################

                # ==> Apply on data coming out of last node
                if concat_at_this_node:
                    lastX = data_dico[predecessors[0]]
                else:
                    lastX = {
                        predecessor: data_dico[predecessor]
                        for predecessor in predecessors
                    }

                last_features = feature_dico[predecessors[0]]

            elif len(predecessors) > 1:
                #######################
                ###  More than one  ###
                #######################
                # ==> concat all the predecessors node and apply it

                ### Fix concatenation order ###
                if do_fit:
                    edges_number = self._get_edges_number(predecessors, node)
                    predecessors = sorted(predecessors,
                                          key=lambda p:
                                          (edges_number.get(p, -1), p))
                    self._all_concat_order[node] = predecessors
                else:
                    predecessors = self._all_concat_order[node]

                all_lastX = [
                    data_dico[predecessor] for predecessor in predecessors
                ]
                all_last_features = [
                    feature_dico[predecessor] for predecessor in predecessors
                ]

                if all_last_features is None or None in all_last_features:
                    last_features = None
                else:
                    last_features = unlist(all_last_features)

                # all_columns_names = [try_to_find_features_names( self._models[predecessor], input_features = input_features)
                #        for predecessor, input_features in zip(predecessors, all_last_features)]

                # for predecessor, input_features in zip(predecessors,all_last_features):
                #    try_to_find_features_names( self._models[predecessor], input_features = input_features)

                if self.verbose:
                    print("start aggregation...")

                if do_fit:
                    output_type = guess_output_type(all_lastX)
                    self._all_concat_type[node] = output_type
                else:
                    output_type = self._all_concat_type[node]

                if concat_at_this_node:
                    lastX = generic_hstack(all_lastX,
                                           output_type=output_type,
                                           all_columns_names=all_last_features)
                else:
                    lastX = {
                        predecessor: data_dico[predecessor]
                        for predecessor in predecessors
                    }

            if node != self._terminal_node:
                # This is not the end of the graph
                if do_fit:
                    if groups is not None and function_has_named_argument(
                            model.fit_transform, "groups"):
                        data_dico[node] = model.fit_transform(
                            lastX, y, groups=groups, **fit_params_step[node])
                    else:
                        data_dico[node] = model.fit_transform(
                            lastX, y, **fit_params_step[node])

                    # ICI : on pourrait sauté le fit pour certains models dans le fit params
                    # Quelque-chose comme :

                    # if node in preffited_models:
                    #
                    # self._model[node] = preffited_models[node]
                    # model = preffited_models[node]
                    # + copy model into pipeline

                    #    data_dico[node] = model.transform(lastX, y)
                    # else:
                    #    data_dico[node] = model.fit_transform(lastX, y, **fit_params_step[node] )

                else:
                    data_dico[node] = model.transform(lastX)

                feature_dico[node] = try_to_find_features_names(
                    model, input_features=last_features)

            else:
                # This is the last node of the Graph
                if method == "fit":
                    if groups is not None and function_has_named_argument(
                            model.fit, "groups"):
                        model.fit(lastX, y, groups, **fit_params_step[node])
                    else:
                        model.fit(lastX, y, **fit_params_step[node])
                    result = self

                elif method == "fit_predict":
                    if groups is not None and function_has_named_argument(
                            model.fit_predict, "groups"):
                        result = model.fit_predict(lastX, y, groups,
                                                   **fit_params_step[node])
                    else:
                        result = model.fit_predict(lastX, y,
                                                   **fit_params_step[node])

                elif method == "fit_transform":
                    if groups is not None and function_has_named_argument(
                            model.fit_transform, "groups"):
                        result = model.fit_transform(lastX, y, groups,
                                                     **fit_params_step[node])
                    else:
                        result = model.fit_transform(lastX, y,
                                                     **fit_params_step[node])

                elif method == "transform":
                    result = model.transform(lastX)

                elif method == "predict":
                    result = model.predict(lastX)

                elif method == "predict_proba":
                    result = model.predict_proba(lastX)

                elif method == "predict_log_proba":
                    result = model.predict_log_proba(lastX)

                elif method == "decision_function":
                    result = model.decision_function(lastX)

                elif method == "score":
                    result = model.score(lastX, y)

                else:
                    raise ValueError("I don't know that kind of method '%s' " %
                                     method)

                feature_dico[node] = try_to_find_features_names(
                    model, input_features=last_features)
                return result

            #######################
            #### Dico cleaning ####
            #######################
            # I'll do a step of cleaning to remove useless blocks in memory
            # I need to remove data in nodes that wont be accessed anymore
            still_usefull = set()
            for n in self.complete_graph.nodes:
                if n in nodes_done:
                    continue

                p = list(self.complete_graph.predecessors(n))
                still_usefull.update(p)

            for n in data_dico.keys():
                if data_dico[n] is None:
                    continue
                if n not in still_usefull:
                    if self.verbose:
                        print("deleting useless node %s" % n)
                    data_dico[n] = None