def transform(self, fp):
        fm = FeaturePool(fp).meta()
        x = FeaturePool(fp).array()

        scaler = MinMaxScaler(feature_range = self.feature_range)
        scaler.fit(x)
        for f in FeaturePool.from_array(fm, scaler.transform(x)):
            yield f
    def transform(self, fp):
        fm, train_x, train_y = FeaturePool.to_train_arrays(fp)

        os = SMOTE(random_state = self.random_state)
        os_train_x, os_train_y = os.fit_sample(train_x, train_y[:, 0])
        os_train_y = os_train_y.reshape((os_train_y.shape[0], 1))

        for f in FeaturePool.from_train_arrays(fm, os_train_x, os_train_y):
            yield Feature.apply_config(f, is_over_sampled=True)
        for f in fp:
            if f.split_type == SplitType.TEST:
                yield f
    def fit_model(self, fp):
        fp = FeaturePool(fp)
        p = (
            fp.train_split()
              .predictors()
              .filter(lambda f: f.categorical)
        )
        x = p.array()
        y = (
            fp.train_split()
              .targets()
              .array()
        )

        return self._inst.fit(x, y)
    def transform(self, fp):
        x = FeaturePool(fp).array()
        logger.info("TUmap: starting UMAP transform ...")
        x_emb = self._inst.fit_transform(x)
        logger.info("TUamp: Done")

        for f_id in range(x_emb.shape[1]):
            yield Feature(
                "UMAP feature #{}".format(f_id),
                x_emb[:, f_id]
            )
    def fit_model(self, fp):
        fm, train_x, train_y = FeaturePool.to_train_arrays(fp)
        train_fm = fm.predictors()

        m = self._inst.fit(train_x, train_y.ravel())

        assert len(train_fm) == len(m.support_), \
            "Size of output of RFE does not equals to the metadata {} != {}".format(
                len(train_fm),
                len(m.support_)
            )
        return m
    def transform(self, fp):
        m = self.fit_model(fp)
        supp = set([
            f.name
            for f, support in zip(FeaturePool(fp).train_predictors(), m.support_)
            if support
        ])

        for f in fp:
            if f.is_predictor:
                if f.name in supp:
                    yield f
                else:
                    logger.info("TFeatureElimination: eliminating feature `{}`".format(f.name))
            else:
                yield f
    def transform(self, fp):
        train_a, test_a = train_test_split(
            FeaturePool(fp).array(),
            test_size = self.test_size,
            random_state = self.random_state,
        )

        for f_id, f in enumerate(fp):
            yield Feature.apply_config(
                Feature(f.name, train_a[:, f_id], f.st),
                split_type=SplitType.TRAIN
            )

        for f_id, f in enumerate(fp):
            yield Feature.apply_config(
                Feature(f.name, test_a[:, f_id], f.st),
                split_type=SplitType.TEST
            )
 def plot_embedding(efp: FeaturePool, split_by=None):
     x = efp.array()
     assert x.shape[1] == 2, "Embedding is expected to be with the size 2 to plot, got {}".format(x.shape[1])
     fig = plt.figure(figsize=(7, 7))
     ax = fig.add_subplot(111)
     if split_by is not None:
         d = split_by.data
         ax.scatter(x[:, 0], x[:, 1], c=d, alpha=0.5)
     else:
         ax.scatter(x[:, 0], x[:, 1], alpha=0.5)
     if split_by is not None:
         ax.set_title(
             "UMAP for a feature pool splitted by feature `{}`".format(split_by.name)
         )
     else:
         ax.set_title(
             "UMAP for a feature pool"
         )
     fig.show()
    def run(self, d):
        while len(self.ops) > 0:
            op = self.ops.popleft()
            if isinstance(op, Transform):
                assert isinstance(d, FeaturePool), \
                    "Expecting `FeaturePool`, got {}".format(d)

                d = FeaturePool([f for f in op(d.features)])

            elif isinstance(op, Model):
                assert isinstance(d, FeaturePool), \
                    "Expecting `FeaturePool`, got {}".format(d)

                d = op(d)

            elif isinstance(op, Validation):
                assert isinstance(d, Model.Output), \
                    "Expecting Model.Output for validation, got {}".format(d)

                d = op(d)
            else:
                raise ValueError(
                    "Failed to dispatch operation: `{}`".format(op))
        return d
 def transform(self, fp):
     self.callback(FeaturePool(fp))
     for f in fp:
         yield f
Beispiel #11
0
    elif data["tenure"] > 60:
        return "Tenure_gt_60"

telcom["tenure_group"] = telcom.apply(lambda telcom: tenure_lab(telcom), axis = 1)

# telcom = telcom.drop("tenure", axis=1)
# # telcom = telcom.drop("TotalCharges", axis=1)
# telcom = telcom.drop("customerID", axis=1)
# telcom.TotalCharges = pd.to_numeric(telcom.TotalCharges, errors='coerce')
# telcom['Churn'].replace(to_replace='Yes', value=1, inplace=True)
# telcom['Churn'].replace(to_replace='No', value=0, inplace=True)
# telcom = pd.get_dummies(telcom)
# telcom.dropna(inplace = True)


fp = FeaturePool.from_dataframe(telcom)

seed = 5

clean = Pipeline(
    TParse(),
    TCleanPool(),
    TSummary(),
)

te = Pipeline(
    TPreprocessPool(),
    TSummary(),
    TCleanRedundantFeatures(correlation_bound=0.99),
)
Beispiel #12
0
        return "Tenure_gt_60"


telcom["tenure_group"] = telcom.apply(lambda telcom: tenure_lab(telcom),
                                      axis=1)

# telcom = telcom.drop("tenure", axis=1)
# # telcom = telcom.drop("TotalCharges", axis=1)
# telcom = telcom.drop("customerID", axis=1)
# telcom.TotalCharges = pd.to_numeric(telcom.TotalCharges, errors='coerce')
# telcom['Churn'].replace(to_replace='Yes', value=1, inplace=True)
# telcom['Churn'].replace(to_replace='No', value=0, inplace=True)
# telcom = pd.get_dummies(telcom)
# telcom.dropna(inplace = True)

fp = FeaturePool.from_dataframe(telcom)

seed = 5

clean = Pipeline(
    TParse(),
    TCleanPool(),
    TSummary(),
)

te = Pipeline(
    TPreprocessPool(),
    TSummary(),
    TCleanRedundantFeatures(correlation_bound=0.99),
)
Beispiel #13
0
 def __init__(self, models, fp):
     self.models = models
     self.fp = FeaturePool(fp)