예제 #1
0
def create_classifier(x, z, toe, window=40, min_buffer=40, max_buffer=200):
    """
    Create dune toe classifier.

    ...

    Parameters
    ----------
    x : ndarray
        Array of cross-shore locations of size (m,).
    z : ndarray
        Array of elevations matching x. May be of size (m,) or (m,n).
    toe : ndarray
        Array of dune toe locations of size (n,).
    window : int, default 40
        Size of the window for training data.
    min_buffer : int, default 40
        Minimum buffer around the real dune toe.
    max_buffer : int, default 200
        Maximum buffer range.

    Returns
    -------
    clf : scikit-learn classifier
        Created random forest classifier.
    """

    # Pre-processing
    z = ds.interp_nan(x, z)  # interp nan
    xx = np.arange(np.min(x), np.max(x) + 0.5, 0.5)
    z = ds.interp_to_grid(x, xx, z)  # interp to grid
    toe = ds.interp_toe_to_grid(x, xx, toe)
    z = ds.moving_average(z, 5)  # apply moving average to smooth
    z = ds.diff_data(z, 1)  # differentiate

    # Create data
    features, labels = create_training_data(xx, z, toe, window, min_buffer,
                                            max_buffer)

    # Build classifier
    clf = RandomForestClassifier(n_estimators=100,
                                 criterion="gini",
                                 random_state=123).fit(features,
                                                       labels.ravel())
    return clf
예제 #2
0
    def predict_dunetoe_ml(self,
                           clf_name,
                           no_of_output=1,
                           dune_crest='max',
                           **kwargs):
        """
        Predict dune toe location using a pre-trained machine learning (ml) classifier.
        See pybeach/classifiers/create_classifier.py to create a classifier.

        ...

        Parameters
        ----------
        clf_name : str
            Classifier to use. Classifier should be contained within 'classifiers'
            directory. In-built options include "barrier", "embayed", "mixed".
        no_of_output : int, default 1
            Number of dune toes to return, ranked from most probable to least probable.
        dune_crest : {'max', 'rr', int, None}, default 'max'
            Method to identify the dune crest location. The region of the beach profile
            that the dune toe location is searched for is constrained to the region
            seaward of the dune crest.
            max: the maximum elevation of the cross-shore profile.
            rr: dune crest calculated based on relative relief.
            int: integer specifying the location of the dune crest. Of size 1 or
                 self.z.shape[0].
            None: do not calculate a dune crest location. Search the whole profile for
                  the dune toe.
        **kwargs : arguments
            Additional arguments to pass to `self.predict_dunecrest()`. Keywords include
            window_size, threshold, water_level.

        Returns
        -------
        dt_index : array of ints
            array containing the indices of no_of_outputs dune toe locations, in
            descending order of probability.
        dt_probabilities : array
            array of dune toe probabilities for each profiles in self.z.

        """
        # Warnings
        assert isinstance(clf_name, str), 'clf_name should be a string.'
        assert isinstance(no_of_output, int) & \
               (no_of_output > 0) & \
               (no_of_output < len(self.x_interp)), f'no_of_outputs must be int between 0 and {len(self.x)}.'

        # Define dune crest
        if dune_crest in ['max', 'rr']:
            for k in kwargs.keys():
                if k not in ["window_size", "threshold", "water_level"]:
                    raise Warning(
                        f'{k} not a valid argument for predict_dunecrest()')
            kwargs = {
                k: v
                for k, v in kwargs.items()
                if k in ["window_size", "threshold", "water_level"]
            }
            dune_crest_loc = self.predict_dunecrest(method=dune_crest,
                                                    **kwargs)
        elif isinstance(dune_crest, int):
            dune_crest_loc = np.full((self.z_interp.shape[0], ), dune_crest)
        elif dune_crest is None:
            dune_crest_loc = np.full((self.z_interp.shape[0], ), 0)
        elif len(dune_crest) == self.z_interp.shape[0] & \
                isinstance(dune_crest, np.ndarray) & \
                all(isinstance(_, np.int64) for _ in dune_crest):
            dune_crest_loc = dune_crest
        else:
            raise ValueError(
                f'dune_crest should be "max", "rr", int (of size 1 or {self.z_interp.shape[0]}), or None'
            )

        # Load the random forest classifier
        try:
            clf = cs.load_classifier(clf_name)
        except FileNotFoundError:
            raise FileNotFoundError(
                f'no classifier named {clf_name} found in classifier folder.')

        # Differentiate data
        z_diff = ds.diff_data(self.z_interp, 1)

        # Predict probability of dune toe for all points along profile
        dt_probabilities = np.array([
            clf.predict_proba(
                np.squeeze(ds.rolling_samples(row, clf.n_features_)))[:, 1]
            for row in z_diff
        ])

        # Interpolate the probabilities back to the original grid
        dt_probabilities = ds.interp_to_grid(self.x_interp, self.x,
                                             dt_probabilities)

        # Retrieve the top 'no_of_outputs' predictions in order
        dt_index = np.array([
            np.flip(np.argsort(row[crest:])[-no_of_output:], 0)
            for row, crest in zip(dt_probabilities, dune_crest_loc)
        ])
        dt_index = np.squeeze(dt_index) + dune_crest_loc

        return dt_index, dt_probabilities
예제 #3
0
    def __init__(self, x, z, window_size=5):
        """
        A class used to represent a 2D beach profile transect.

        ...

        Parameters
        ----------
        x : ndarray
            Array of cross-shore locations of size (m,).
        z : ndarray
            Array of elevations matching x. May be of size (m,) or (m,n).
        window_size : int, default 5
            Size of window used to smooth z with a moving average.

        Attributes
        ----------
        x_orig : ndarray
            Original input array of cross-shore locations.
        z_orig : ndarray
            Original array of profile elevations matching x_orig.
        x : ndarray
            x_orig interpolated to 0.5 m grid.
        z : ndarray
            z_orig interpolated to 0.5 m grid and smoothed by a moving average with
            window size smooth_window.

        Methods
        -------
        predict_dunetoe_ml(self, clf_name, no_of_output=1, dune_crest='rr', **kwargs)
        predict_dunetoe_mc(self, dune_crest='rr', shoreline=True, window_size=None, **kwargs)
        predict_dunetoe_pd(self, dune_crest=None, shoreline=None, **kwargs)
        predict_dunetoe_rr(self, window_size=11, threshold=0.2, water_level=0)
        predict_dunecrest(self, method="max", window_size=50, threshold=0.8, water_level=0)
        predict_shoreline(self, water_level=0, dune_crest='rr', **kwargs)

        """
        assert isinstance(x, np.ndarray) & (
            np.ndim(x) == 1), 'x should be of type ndarray and shape (m,).'
        assert (np.ndim(x) == 1), 'x should be a 1-d array of size (m,).'
        assert (len(x) > 1), 'x should have length > 1.'
        assert isinstance(z, np.ndarray), 'z should be of type ndarray.'
        assert isinstance(window_size, int) & \
               (window_size > 0) & \
               (window_size < len(x)), f'window_size must be int between 0 and {len(x)}.'

        # Ensure inputs are row vectors
        x = np.atleast_1d(x)
        z = np.atleast_2d(z)
        if len(x) not in z.shape:
            raise ValueError(
                f'Input x of shape ({x.shape[0]},) must share a dimension with input z which has shape {z.shape[0], z.shape[1]}.'
            )
        if x.shape[0] != z.shape[1]:
            z = z.T

        # Store original inputs
        self.x = x
        self.z = z

        # Interp nan values
        z = ds.interp_nan(x, z)
        flag = np.polyfit(x, z.T, 1)[0]
        if np.any(flag > 0):
            #raise Warning(f'Input profiles should be oriented from landward (left) to seaward (right), '
            #             f'some inputted profiles appear to have the sea on the left. This may cause errors.')
            print(
                f'Input profiles should be oriented from landward (left) to seaward (right), '
                f'some inputted profiles appear to have the sea on the left. This may cause errors.'
            )
        # Interp to 0.5 m grid
        self.x_interp = np.arange(np.min(x), np.max(x) + 0.5, 0.5)
        z = ds.interp_to_grid(x, self.x_interp, z)

        # Apply moving average to smooth data
        z = ds.moving_average(z, window_size)

        # Store transformed inputs
        self.z_interp = z