コード例 #1
0
ファイル: steps.py プロジェクト: indyfree/CARLA
def encode(fitted_encoder: BaseEstimator, features: List[str],
           df: pd.DataFrame) -> pd.DataFrame:
    """
    Pipeline function to encode data with fitted sklearn OneHotEncoder.

    Parameters
    ----------
    fitted_encoder : sklearn OneHotEncoder
        Encodes input data.
    features : list
        List of categorical feature.
    df : pd.DataFrame
        Data we want to normalize

    Returns
    -------
    output : pd.DataFrame
        Whole DataFrame with encoded values
    """
    output = df.copy()
    encoded_features = fitted_encoder.get_feature_names(features)
    output[encoded_features] = fitted_encoder.transform(output[features])
    output = output.drop(features, axis=1)

    return output
コード例 #2
0
    def hook(self, model: BaseEstimator, history: History):
        noimages = self.datapoints.shape[0]
        latvar = model.transform(self.datapoints)
        konstrukt = model.inverse_transform(latvar())

        last = history.last()
        filename = 'rekonstrukt-%02de-%03db.png' % (last[1], last[2])

        img = visualize_reconstruction(self.datapoints, konstrukt, noimages)
        img.save(join(self.outdir, filename))
コード例 #3
0
    def _fit_step(self,
                  transformer: BaseEstimator,
                  ids: Tuple,
                  is_final: bool,
                  X: pd.DataFrame,
                  y: Iterable = None,
                  **fit_params):
        # make transformer unique for each CV split
        transformer.train_ = tuple(X.index)
        transformer.features_ = tuple(X.columns)

        # load transformer from database
        transformer_loaded, ids_loaded = self._load(transformer, ids)
        is_loaded = False if transformer_loaded is None else True
        if is_loaded:
            transformer = transformer_loaded
            ids = ids_loaded

        # fit final step
        if is_final:
            if not is_loaded:
                transformer.fit(X, y, **fit_params)

        # fit intermediate steps
        else:
            if not is_loaded:
                transformer.fit(X, y, **fit_params)

            transformed_data = transformer.transform(X)

            if isinstance(transformed_data, Tuple):
                X, y = transformed_data

            else:
                Xnp = transformed_data

                # reshape input data
                if Xnp.shape != X.shape:
                    if isinstance(X, pd.DataFrame):
                        X = X.iloc[:, transformer.get_support()]

                else:
                    X = pd.DataFrame(Xnp)

        # save transformer
        if not is_loaded:
            ids = self._save(transformer, ids)

        return transformer, ids, X
コード例 #4
0
ファイル: pca.py プロジェクト: hammer/sgkit
def pca_transform(
    ds: Dataset,
    est: BaseEstimator,
    *,
    variable: str = "call_alternate_allele_count",
    check_missing: bool = True,
    merge: bool = True,
) -> Dataset:
    """ Apply PCA estimator to new data """
    AC = _allele_counts(ds, variable, check_missing=check_missing)
    projection = est.transform(da.asarray(AC).T)
    new_ds = Dataset(
        {variables.sample_pca_projection: (("samples", "components"), projection)}
    )
    return conditional_merge_datasets(ds, variables.validate(new_ds), merge)
コード例 #5
0
def _determine_offset(model: BaseEstimator, X: Union[np.ndarray,
                                                     pd.DataFrame]) -> int:
    """
    Determine the model's offset. How much does the output of the model differ
    from its input?

    Parameters
    ----------
    model: sklearn.base.BaseEstimator
        Trained model with either ``predict`` or ``transform`` method, preference
        given to ``predict``.
    X: Union[np.ndarray, pd.DataFrame]
        Data to pass to the model's ``predict`` or ``transform`` method.

    Returns
    -------
    int
        The difference between X and the model's output lengths.
    """
    out = model.predict(X) if hasattr(model, "predict") else model.transform(X)
    return len(X) - len(out)
コード例 #6
0
ファイル: steps.py プロジェクト: indyfree/CARLA
def scale(fitted_scaler: BaseEstimator, features: List[str],
          df: pd.DataFrame) -> pd.DataFrame:
    """
    Pipeline function to normalize data with fitted sklearn scaler.

    Parameters
    ----------
    fitted_scaler : sklearn Scaler
        Normalizes input data
    features : list
        List of continuous feature
    df : pd.DataFrame
        Data we want to normalize

    Returns
    -------
    output : pd.DataFrame
        Whole DataFrame with normalized values

    """
    output = df.copy()
    output[features] = fitted_scaler.transform(output[features])

    return output
コード例 #7
0
def scaled_linspace(x: np.ndarray, y: np.ndarray, num: int,
                    scaler: BaseEstimator) -> np.ndarray:
    """Generate a linspace, evenly spaced according to the normalization

        Args:
            x (np.ndarray): First point
            y (np.ndarray): Sencond point
            num (int): Number of points (in between the two points)
            method (str): Normalization method

        Returns:
            np.ndarray: Sequence of points evenly spaced
        """
    # Normalize the points
    x = scaler.transform([x])[0]
    y = scaler.transform([y])[0]

    # Generate the linspace
    ls = np.linspace(x, y, num=num + 1, endpoint=True)

    # Unnormalize the points
    ls = scaler.inverse_transform(ls)

    return ls