Ejemplo n.º 1
0
class frst3ChrsCVTE(CustomTransformer):
    _testing_can_skip_failure = False  # ensure tested as if shouldn't fail

    @staticmethod
    def get_default_properties():
        return dict(col_type="text", min_cols=1, max_cols=1, relative_importance=1)

    def fit_transform(self, X: dt.Frame, y: np.array = None):
        self.binner = firstNChars()
        X = self.binner.fit_transform(X, 3)

        # Compute mean target (out of fold) per same string
        self.cvte = CVTargetEncodeTransformer(cat_cols=X.names)

        if self.labels is not None:
            # for classification, always turn y into numeric form, even if already integer
            y = dt.Frame(LabelEncoder().fit(self.labels).transform(y))

        X = self.cvte.fit_transform(X, y)
        return X

    def transform(self, X: dt.Frame):
        X = self.binner.transform(X, 3)
        X = self.cvte.transform(X)
        return X
Ejemplo n.º 2
0
class LogScaleTargetEncodingTransformer(CustomTransformer):
    @staticmethod
    def get_default_properties():
        return dict(col_type="numeric",
                    min_cols=1,
                    max_cols=1,
                    relative_importance=1)

    def fit_transform(self, X: dt.Frame, y: np.array = None):
        # Roughly: Convert numbers to a string of their exponent
        self.binner = LogScaleBinner()
        X = self.binner.fit_transform(X)

        # Compute mean target (out of fold) per same string
        self.cvte = CVTargetEncodeTransformer(cat_cols=X.names)

        if self.labels is not None:
            # for classification, always turn y into numeric form, even if already integer
            y = dt.Frame(LabelEncoder().fit(self.labels).transform(y))

        X = self.cvte.fit_transform(X, y)
        return X

    def transform(self, X: dt.Frame):
        X = self.binner.transform(X)
        X = self.cvte.transform(X)
        return X
Ejemplo n.º 3
0
    def fit_transform(self, X: dt.Frame, y: np.array = None):
        self.binner = firstNChars()
        X = self.binner.fit_transform(X, 3)

        # Compute mean target (out of fold) per same string
        self.cvte = CVTargetEncodeTransformer(cat_cols=X.names)

        if self.labels is not None:
            # for classification, always turn y into numeric form, even if already integer
            y = dt.Frame(LabelEncoder().fit(self.labels).transform(y))

        X = self.cvte.fit_transform(X, y)
        return X
Ejemplo n.º 4
0
    def fit_transform(self, X: dt.Frame, y: np.array = None):
        # Roughly: Convert numbers to a string of their exponent
        self.binner = LogScaleBinner()
        X = self.binner.fit_transform(X)

        # Compute mean target (out of fold) per same string
        self.cvte = CVTargetEncodeTransformer(cat_cols=X.names)

        if self.labels is not None:
            # for classification, always turn y into numeric form, even if already integer
            y = dt.Frame(LabelEncoder().fit(self.labels).transform(y))

        X = self.cvte.fit_transform(X, y)
        return X
Ejemplo n.º 5
0
class LogScaleTargetEncodingTransformer(CustomTransformer):
    _testing_can_skip_failure = False  # ensure tested as if shouldn't fail

    @staticmethod
    def get_default_properties():
        return dict(col_type="numeric",
                    min_cols=1,
                    max_cols=1,
                    relative_importance=1)

    def fit_transform(self, X: dt.Frame, y: np.array = None):
        # Roughly: Convert numbers to a string of their exponent
        self.binner = LogScaleBinner()
        X = self.binner.fit_transform(X)

        # Compute mean target (out of fold) per same string
        self.cvte = CVTargetEncodeTransformer(cat_cols=X.names)

        if self.labels is not None:
            # for classification, always turn y into numeric form, even if already integer
            y = dt.Frame(LabelEncoder().fit(self.labels).transform(y))

        X = dt.Frame(self.cvte.fit_transform(X, y))
        # ensure no inf
        # Don't leave inf/-inf
        for i in range(X.ncols):
            X.replace([math.inf, -math.inf], None)
        return X

    def transform(self, X: dt.Frame):
        X = self.binner.transform(X)
        X = dt.Frame(self.cvte.transform(X))
        # Don't leave inf/-inf
        for i in range(X.ncols):
            X.replace([math.inf, -math.inf], None)
        return X