Beispiel #1
0
def create_classifier(x, z, toe, window=40, min_buffer=40, max_buffer=200):
    """
    Create dune toe classifier.

    ...

    Parameters
    ----------
    x : ndarray
        Array of cross-shore locations of size (m,).
    z : ndarray
        Array of elevations matching x. May be of size (m,) or (m,n).
    toe : ndarray
        Array of dune toe locations of size (n,).
    window : int, default 40
        Size of the window for training data.
    min_buffer : int, default 40
        Minimum buffer around the real dune toe.
    max_buffer : int, default 200
        Maximum buffer range.

    Returns
    -------
    clf : scikit-learn classifier
        Created random forest classifier.
    """

    # Pre-processing
    z = ds.interp_nan(x, z)  # interp nan
    xx = np.arange(np.min(x), np.max(x) + 0.5, 0.5)
    z = ds.interp_to_grid(x, xx, z)  # interp to grid
    toe = ds.interp_toe_to_grid(x, xx, toe)
    z = ds.moving_average(z, 5)  # apply moving average to smooth
    z = ds.diff_data(z, 1)  # differentiate

    # Create data
    features, labels = create_training_data(xx, z, toe, window, min_buffer,
                                            max_buffer)

    # Build classifier
    clf = RandomForestClassifier(n_estimators=100,
                                 criterion="gini",
                                 random_state=123).fit(features,
                                                       labels.ravel())
    return clf
Beispiel #2
0
    def predict_dunetoe_ml(self,
                           clf_name,
                           no_of_output=1,
                           dune_crest='max',
                           **kwargs):
        """
        Predict dune toe location using a pre-trained machine learning (ml) classifier.
        See pybeach/classifiers/create_classifier.py to create a classifier.

        ...

        Parameters
        ----------
        clf_name : str
            Classifier to use. Classifier should be contained within 'classifiers'
            directory. In-built options include "barrier", "embayed", "mixed".
        no_of_output : int, default 1
            Number of dune toes to return, ranked from most probable to least probable.
        dune_crest : {'max', 'rr', int, None}, default 'max'
            Method to identify the dune crest location. The region of the beach profile
            that the dune toe location is searched for is constrained to the region
            seaward of the dune crest.
            max: the maximum elevation of the cross-shore profile.
            rr: dune crest calculated based on relative relief.
            int: integer specifying the location of the dune crest. Of size 1 or
                 self.z.shape[0].
            None: do not calculate a dune crest location. Search the whole profile for
                  the dune toe.
        **kwargs : arguments
            Additional arguments to pass to `self.predict_dunecrest()`. Keywords include
            window_size, threshold, water_level.

        Returns
        -------
        dt_index : array of ints
            array containing the indices of no_of_outputs dune toe locations, in
            descending order of probability.
        dt_probabilities : array
            array of dune toe probabilities for each profiles in self.z.

        """
        # Warnings
        assert isinstance(clf_name, str), 'clf_name should be a string.'
        assert isinstance(no_of_output, int) & \
               (no_of_output > 0) & \
               (no_of_output < len(self.x_interp)), f'no_of_outputs must be int between 0 and {len(self.x)}.'

        # Define dune crest
        if dune_crest in ['max', 'rr']:
            for k in kwargs.keys():
                if k not in ["window_size", "threshold", "water_level"]:
                    raise Warning(
                        f'{k} not a valid argument for predict_dunecrest()')
            kwargs = {
                k: v
                for k, v in kwargs.items()
                if k in ["window_size", "threshold", "water_level"]
            }
            dune_crest_loc = self.predict_dunecrest(method=dune_crest,
                                                    **kwargs)
        elif isinstance(dune_crest, int):
            dune_crest_loc = np.full((self.z_interp.shape[0], ), dune_crest)
        elif dune_crest is None:
            dune_crest_loc = np.full((self.z_interp.shape[0], ), 0)
        elif len(dune_crest) == self.z_interp.shape[0] & \
                isinstance(dune_crest, np.ndarray) & \
                all(isinstance(_, np.int64) for _ in dune_crest):
            dune_crest_loc = dune_crest
        else:
            raise ValueError(
                f'dune_crest should be "max", "rr", int (of size 1 or {self.z_interp.shape[0]}), or None'
            )

        # Load the random forest classifier
        try:
            clf = cs.load_classifier(clf_name)
        except FileNotFoundError:
            raise FileNotFoundError(
                f'no classifier named {clf_name} found in classifier folder.')

        # Differentiate data
        z_diff = ds.diff_data(self.z_interp, 1)

        # Predict probability of dune toe for all points along profile
        dt_probabilities = np.array([
            clf.predict_proba(
                np.squeeze(ds.rolling_samples(row, clf.n_features_)))[:, 1]
            for row in z_diff
        ])

        # Interpolate the probabilities back to the original grid
        dt_probabilities = ds.interp_to_grid(self.x_interp, self.x,
                                             dt_probabilities)

        # Retrieve the top 'no_of_outputs' predictions in order
        dt_index = np.array([
            np.flip(np.argsort(row[crest:])[-no_of_output:], 0)
            for row, crest in zip(dt_probabilities, dune_crest_loc)
        ])
        dt_index = np.squeeze(dt_index) + dune_crest_loc

        return dt_index, dt_probabilities