Example #1
0
    def fit(self, x: ArrayLike):
        """Compute the minimum and maximum for scaling.

        Args:
            x: array-like of shape (n_samples, n_features)
                The data used to compute the per-feature minimum and maximum
                used for later scaling along the features axis.
        Returns:
            None. Updates the transformer with feature fitting parameters.
        """
        x = np.array(x)
        self.mins_ = x.min(axis=0)
        self.maxs_ = x.max(axis=0)
        self.hinge_indices_ = np.linspace(self.mins_, self.maxs_,
                                          self.n_hinges_)
Example #2
0
    def _format_labels_and_dtypes(self,
                                  x: ArrayLike,
                                  categorical: list = None,
                                  labels: list = None) -> None:
        """Read input x data and lists of categorical data indices and band
            labels to format and store this info for later indexing.

        Args:
            s: array-like of shape (n_samples, n_features)
            categorical: indices indicating which x columns are categorical
            labels: covariate column labels. ignored if x is a pandas DataFrame
        """
        if isinstance(x, np.ndarray):
            nrows, ncols = x.shape
            if categorical is None:
                continuous = list(range(ncols))
            else:
                continuous = list(
                    set(range(ncols)).difference(set(categorical)))
            self.labels_ = labels or make_band_labels(ncols)
            self.categorical_ = categorical
            self.continuous_ = continuous

        elif isinstance(x, pd.DataFrame):
            x.drop(["geometry"], axis=1, errors="ignore", inplace=True)
            self.labels_ = labels or list(x.columns)

            # store both pandas and numpy indexing of these values
            self.continuous_pd_ = list(
                x.select_dtypes(exclude="category").columns)
            self.categorical_pd_ = list(
                x.select_dtypes(include="category").columns)

            all_columns = list(x.columns)
            self.continuous_ = [
                all_columns.index(item) for item in self.continuous_pd_
                if item in all_columns
            ]
            if len(self.categorical_pd_) != 0:
                self.categorical_ = [
                    all_columns.index(item) for item in self.categorical_pd_
                    if item in all_columns
                ]
            else:
                self.categorical_ = None
Example #3
0
    def fit(self, x: ArrayLike):
        """Compute the minimum and maximum for scaling.

        Args:
            x: array-like of shape (n_samples, n_features)
                The data used to compute the per-feature minimum and maximum
                used for later scaling along the features axis.
        Returns:
            None. Updates the transformer with feature fitting parameters.
        """
        self.estimators_ = []
        x = np.array(x)
        if x.ndim == 1:
            estimator = OneHotEncoder(dtype=np.uint8, sparse=False)
            self.estimators_.append(estimator.fit(x.reshape(-1, 1)))
        else:
            nrows, ncols = x.shape
            for col in range(ncols):
                xsub = x[:, col].reshape(-1, 1)
                estimator = OneHotEncoder(dtype=np.uint8, sparse=False)
                self.estimators_.append(estimator.fit(xsub))
Example #4
0
    def transform(self, x: ArrayLike) -> np.ndarray:
        """Scale covariates according to the feature range.

        Args:
            x: array-like of shape (n_samples, n_features)
                Input data that will be transformed.

        Returns:
            ndarray with transformed data.
        """
        x = np.array(x)
        if x.ndim == 1:
            estimator = self.estimators_[0]
            return estimator.transform(x.reshape(-1, 1))
        else:
            class_data = []
            nrows, ncols = x.shape
            for col in range(ncols):
                xsub = x[:, col].reshape(-1, 1)
                estimator = self.estimators_[col]
                class_data.append(estimator.transform(xsub))
            return np.concatenate(class_data, axis=1)
Example #5
0
    def fit(
        self,
        x: ArrayLike,
        y: ArrayLike,
        categorical: List[int] = None,
        labels: list = None,
        is_features: bool = False,
        feature_labels: list = None,
    ) -> None:
        """Trains a maxent model using a set of covariates and presence/background points.

        Args:
            x: array-like of shape (n_samples, n_features) with covariate data
            y: array-like of shape (n_samples,) with binary presence/background (1/0) values
            categorical: indices for which columns are categorical
            labels: covariate labels. ignored if x is a pandas DataFrame
            is_features: specify that x data has been transformed from covariates to features
            feature_labels: list of length n_features, with labels identifying each column's feature type
                with options ["linear", "quadratic", "product", "threshold", "hinge", "categorical"]
                must be set if `is_features=True`
        """
        # fir the feature transformer
        if is_features:
            features = x
            assert feature_labels is not None, "feature_labels must be set if is_features=True"

        else:
            self.transformer = _features.MaxentFeatureTransformer(
                feature_types=self.feature_types_,
                clamp=self.clamp_,
                n_hinge_features=self.n_hinge_features_,
                n_threshold_features=self.n_threshold_features_,
            )
            features = self.transformer.fit_transform(x,
                                                      categorical=categorical,
                                                      labels=labels)
            feature_labels = self.transformer.feature_names_

        # compute sample weights
        if self.weight_strategy_ == "balance":
            pbr = len(y) / y.sum()
        else:
            pbr = self.weight_strategy_

        self.weights_ = _features.compute_weights(y, pbr=pbr)

        # set feature regularization parameters
        self.regularization_ = _features.compute_regularization(
            y,
            features,
            feature_labels=feature_labels,
            beta_multiplier=self.beta_multiplier_,
            beta_threshold=self.beta_threshold_,
            beta_hinge=self.beta_hinge_,
            beta_categorical=self.beta_categorical_,
        )

        # get model lambda scores to initialize the glm
        self.lambdas_ = _features.compute_lambdas(y, self.weights_,
                                                  self.regularization_)

        # model fitting
        self.initialize_model(lambdas=self.lambdas_)
        self.estimator.fit(
            features,
            y,
            sample_weight=self.weights_,
            relative_penalties=self.regularization_,
        )

        # get the beta values based on which lamba selection method to use
        if self.use_lambdas_ == "last":
            self.beta_scores_ = self.estimator.coef_path_[0, :, -1]
        elif self.use_lambdas_ == "best":
            self.beta_scores_ = self.estimator.coef_path_[
                0, :, self.estimator.lambda_max_inx_]

        # apply maxent-specific transformations
        raw = self.predict(features[y == 0], transform="raw", is_features=True)

        # alpha is a normalizing constant that ensures that f1(z) integrates (sums) to 1
        self.alpha_ = maxent_alpha(raw)

        # the distance from f(z) is the relative entropy of f1(z) WRT f(z)
        self.entropy_ = maxent_entropy(raw)