コード例 #1
0
def test_one_hot_encoding():
    """Process str and int one hot columns and verify outputs."""
    proc = PreProcess(A)
    out = proc.process_one_hot(convert_int=False)

    assert (out.iloc[:, -4:].sum(axis=1) == np.ones(7)).all()
    assert out['f1_0'].values[0] == 1.0
    assert out['f1_0'].values[1] == 0.0
    assert out['f1_0'].values[-3] == 1.0
    assert out['f1_0'].values[-1] == 1.0
    assert out['f1_1'].values[1] == 1.0
    assert out['f1_1'].values[0] == 0.0
    assert out['f1_2'].values[2] == 1.0
    assert out['f1_2'].values[-2] == 1.0
    assert out['f1_2'].values[-1] == 0.0
    assert out['f1_3'].values[3] == 1.0
    assert out['f1_3'].values[0] == 0.0

    proc = PreProcess(A.values)
    np_out = proc.process_one_hot(convert_int=False)
    assert np.allclose(out, np_out)

    proc = PreProcess(A)
    out = proc.process_one_hot(convert_int=True)
    assert 'f3' not in out
    assert (out.iloc[:, 1:5].sum(axis=1) == np.ones(7)).all()
    assert (out.iloc[:, 5:].sum(axis=1) == np.ones(7)).all()
コード例 #2
0
def test_categories():
    """ Verify predefined categories handle missing data """
    proc = PreProcess(A)
    out = proc.process_one_hot(convert_int=False)
    assert (out.columns == ['f2', 'f3', 'f1_0', 'f1_1',
                            'f1_2', 'f1_3']).all()

    # Verify columns are created for missing categories
    # and that the new one-hot columns have names corresponding to their values
    proc = PreProcess(A)
    out0 = proc.process_one_hot(
        convert_int=False, categories={'f1': ['a', 'b', 'c', 'd', 'missing']})
    assert (out0.columns == ['f2', 'f3', 'a', 'b', 'c', 'd', 'missing']).all()
    assert (out0['missing'] == np.zeros(7)).all()

    # verify ordering works.
    out1 = proc.process_one_hot(
        convert_int=False, categories={'f1': ['missing', 'd', 'c', 'a', 'b']})
    assert (out1.columns == ['f2', 'f3', 'missing', 'd', 'c', 'a', 'b']).all()
    assert all(out0.a == out1.a)
    assert all(out0.b == out1.b)
    assert all(out0.c == out1.c)
    assert all(out0.d == out1.d)
    assert (out1['missing'] == np.zeros(7)).all()
    assert out1.a.values[0] == 1
    assert out1.a.values[1] == 0
    assert out1.a.values[2] == 0
    assert out1.a.values[3] == 0
    assert out1.a.values[4] == 1

    # Verify good error with bad categories input.
    try:
        proc.process_one_hot(categories={'f1': ['a', 'b', 'c']})
    except ValueError as e:
        assert 'Found unknown categories' in str(e)
コード例 #3
0
ファイル: base_model.py プロジェクト: jatropj/phygnn
    def __init__(self,
                 model,
                 feature_names=None,
                 label_names=None,
                 norm_params=None,
                 normalize=(True, False),
                 one_hot_categories=None):
        """
        Parameters
        ----------
        model : OBJ
            Initialized model object
        feature_names : list
            Ordered list of feature names.
        label_names : list
            Ordered list of label (output) names.
        norm_params : dict, optional
            Dictionary mapping feature and label names (keys) to normalization
            parameters (mean, stdev), by default None
        normalize : bool | tuple, optional
            Boolean flag(s) as to whether features and labels should be
            normalized. Possible values:
            - True means normalize both
            - False means don't normalize either
            - Tuple of flags (normalize_feature, normalize_label)
            by default True
        one_hot_categories : dict, optional
            Features to one-hot encode using given categories, if None do
            not run one-hot encoding, by default None
        """
        self._model = model

        if isinstance(feature_names, str):
            feature_names = [feature_names]
        elif isinstance(feature_names, (np.ndarray, pd.Index)):
            feature_names = feature_names.tolist()

        self._feature_names = feature_names

        if isinstance(label_names, str):
            label_names = [label_names]
        elif isinstance(label_names, (np.ndarray, pd.Index)):
            label_names = label_names.tolist()

        self._label_names = label_names
        if norm_params is None:
            norm_params = {}

        self._norm_params = norm_params
        self._normalize = self._parse_normalize(normalize)
        if one_hot_categories is not None:
            PreProcess.check_one_hot_categories(one_hot_categories)

        self._one_hot_categories = one_hot_categories
コード例 #4
0
    def _normalize_arr(self, arr, names):
        """
        Normalize array and save normalization parameters to given names

        Parameters
        ----------
        arr : ndarray
            Array of features/label to normalize
        names : list
            List of feature/label names

        Returns
        -------
        norm_arr : ndarray
            Normalized features/label
        """
        n_names = self._get_item_number(arr)
        if len(names) != n_names:
            msg = ("Number of item names ({}) does not match number of items "
                   "({})".format(len(names), arr.shape[1]))
            logger.error(msg)
            raise RuntimeError(msg)

        means, stdevs = self.get_norm_params(names)
        update = means is None or stdevs is None

        norm_arr, means, stdevs = PreProcess.normalize(arr,
                                                       mean=means,
                                                       stdev=stdevs)
        if update:
            for i, n in enumerate(names):
                norm_params = {n: {'mean': means[i], 'stdev': stdevs[i]}}
                self._norm_params.update(norm_params)

        return norm_arr
コード例 #5
0
    def _normalize_df(self, df):
        """
        Normalize DataFrame

        Parameters
        ----------
        df : pandas.DataFrame
            DataFrame of features/label to normalize

        Returns
        -------
        norm_df : pandas.DataFrame
            Normalized features/label
        """
        means, stdevs = self.get_norm_params(df.columns)
        update = means is None or stdevs is None

        norm_df, means, stdevs = PreProcess.normalize(df,
                                                      mean=means,
                                                      stdev=stdevs)
        if update:
            for i, c in enumerate(df.columns):
                norm_params = {c: {'mean': means[i], 'stdev': stdevs[i]}}
                self._norm_params.update(norm_params)

        return norm_df
コード例 #6
0
    def _normalize_dict(self, items):
        """
        Normalize given dictionary of items (features | labels)

        Parameters
        ----------
        items : dict
            mapping of names to vectors

        Returns
        -------
        norm_items : dict
            mapping of names to normalized-feature vectors
        """
        norm_items = {}
        for key, value in items.items():
            mean = self.get_mean(key)
            stdev = self.get_stdev(key)
            update = mean is None or stdev is None
            try:
                value, mean, stdev = PreProcess.normalize(value,
                                                          mean=mean,
                                                          stdev=stdev)
                if update:
                    norm_params = {key: {'mean': mean, 'stdev': stdev}}
                    self._norm_params.update(norm_params)
            except Exception as ex:
                msg = "Could not normalize {}:\n{}".format(key, ex)
                logger.warning(msg)
                warn(msg)

            norm_items[key] = value

        return norm_items
コード例 #7
0
def test_OHE():
    """
    Test one-hot encoding
    """
    ohe_features = FEATURES.copy()
    categories = list('def')
    ohe_features['categorical'] = np.random.choice(categories, len(FEATURES))
    one_hot_categories = {'categorical': categories}

    model = ModelBase(None,
                      feature_names=ohe_features.columns,
                      label_names=LABELS.columns,
                      normalize=True,
                      one_hot_categories=one_hot_categories)

    baseline, means, stdevs = \
        PreProcess.normalize(FEATURES.values.astype('float32'))
    test = model.parse_features(ohe_features)

    assert np.allclose(baseline, test[:, :2])
    assert np.allclose(means,
                       np.array(model.feature_means, dtype='float32')[:2])
    assert np.allclose(stdevs,
                       np.array(model.feature_stdevs, dtype='float32')[:2])
    for c in categories:
        assert model.get_mean(c) is None
        assert model.get_stdev(c) is None

    assert all(np.isin(categories, model.feature_names))
    assert not any(np.isin(categories, model.input_feature_names))
    assert 'categorical' not in model.feature_names
    assert 'categorical' in model.input_feature_names
コード例 #8
0
    def _unnormalize_df(self, df):
        """
        Un-normalize DataFrame

        Parameters
        ----------
        df : pandas.DataFrame
            DataFrame of features/label to un-normalize

        Returns
        -------
        df : pandas.DataFrame
            Native features/label df if norm params are not None
        """
        means, stdevs = self.get_norm_params(df.columns)

        if means is not None and stdevs is not None:
            df = PreProcess.unnormalize(df.copy(), means, stdevs)
        else:
            msg = ("Normalization parameters are unavailable, df will not be "
                   "un-normalized!")
            logger.warning(msg)
            warn(msg)

        return df
コード例 #9
0
    def _unnormalize_arr(self, arr, names):
        """
        Un-normalize array using given names

        Parameters
        ----------
        arr : ndarray
            Array of features/label to un-normalize
        names : list
            List of feature/label names

        Returns
        -------
        arr : ndarray
            Native features/label array if norm params are not None
        """
        n_names = self._get_item_number(arr)
        if len(names) != n_names:
            msg = ("Number of item names ({}) does not match number of items "
                   "({})".format(len(names), arr.shape[1]))
            logger.error(msg)
            raise RuntimeError(msg)

        means, stdevs = self.get_norm_params(names)

        if means is not None and stdevs is not None:
            arr = PreProcess.unnormalize(arr.copy(), means, stdevs)
        else:
            msg = ("Normalization parameters are unavailable, arr will not be "
                   "un-normalized!")
            logger.warning(msg)
            warn(msg)

        return arr
コード例 #10
0
    def _unnormalize_dict(self, items):
        """
        Un-normalize given dictionary of items (features | labels)

        Parameters
        ----------
        items : dict
            mapping of names to vectors

        Returns
        -------
        native_items : dict
            mapping of names to native vectors
        """
        native_items = {}
        for key, value in items.items():
            norm_params = self.normalization_parameters[key]
            if norm_params is not None:
                value = PreProcess.unnormalize(value, norm_params['mean'],
                                               norm_params['stdev'])
            else:
                msg = ("Normalization Parameters unavailable, {} will not be "
                       "un-normalized!".format(key))
                logger.warning(msg)
                warn(msg)

            native_items[key] = value

        return native_items
コード例 #11
0
    def _parse_features(self,
                        features,
                        names=None,
                        process_one_hot=True,
                        **kwargs):
        """
        Parse features

        Parameters
        ----------
        features : pandas.DataFrame | dict | ndarray
            Features to train on or predict from
        names : list, optional
            List of feature names, by default None
        process_one_hot : bool, optional
            Check for and process one-hot variables, by default True
        kwargs : dict, optional
            kwargs for PreProcess.one_hot

        Returns
        -------
        features : ndarray
            Parsed features array normalized and with str columns converted
            to one hot vectors if desired
        """
        features, feature_names = self._parse_data(features, names=names)

        if len(features.shape) != 2:
            msg = ('{} can only use 2D data as input!'.format(
                self.__class__.__name__))
            logger.error(msg)
            raise RuntimeError(msg)

        if self.feature_names is not None:
            if features.shape[1] != len(self.feature_names):
                msg = ('data has {} features but expected {}'.format(
                    features.shape[1], self.feature_dims))
                logger.error(msg)
                raise RuntimeError(msg)

        if self._feature_names is None:
            self._feature_names = feature_names
        elif self.feature_names != feature_names:
            msg = ('Expecting features with names: {}, but was provided with: '
                   '{}!'.format(feature_names, self.feature_names))
            logger.error(msg)
            raise RuntimeError(msg)

        if process_one_hot:
            kwargs.update({'return_ind': True})
            features, one_hot_ind = PreProcess.one_hot(features, **kwargs)
            if one_hot_ind:
                one_hot_features = [self.feature_names[i] for i in one_hot_ind]
                self._check_one_hot_norm_params(one_hot_features)

        if self.normalize_features:
            features = self.normalize(features, names=feature_names)

        return features
コード例 #12
0
def test_norm_df():
    """Test ModelBase Normalization on a dataframe"""
    model = ModelBase(None,
                      feature_names=FEATURES.columns,
                      label_names=LABELS.columns,
                      normalize=True)

    baseline, means, stdevs = PreProcess.normalize(FEATURES)
    test = model.parse_features(FEATURES)
    assert np.allclose(baseline.values, test)
    assert np.allclose(means, model.feature_means)
    assert np.allclose(stdevs, model.feature_stdevs)

    baseline, means, stdevs = PreProcess.normalize(LABELS)
    test = model._parse_labels(LABELS)
    np.allclose(baseline.values, test)
    assert np.allclose(means, model.label_means)
    assert np.allclose(stdevs, model.label_stdevs)
コード例 #13
0
    def parse_features(self, features, names=None, **kwargs):
        """Parse features - preprocessing of feature data before training or
        prediction. This will do one-hot encoding based on
        self.one_hot_categories, and feature normalization based on
        self.normalize_features

        Parameters
        ----------
        features : pandas.DataFrame | dict | ndarray
            Features to train on or predict from
        names : list, optional
            List of feature names, by default None
        kwargs : dict, optional
            kwargs for PreProcess.one_hot

        Returns
        -------
        features : ndarray
            Parsed features array normalized and with str columns converted
            to one hot vectors if desired
        """
        features, feature_names = self._parse_data(features, names=names)

        if len(features.shape) != 2:
            msg = ('{} can only use 2D data as input!'.format(
                self.__class__.__name__))
            logger.error(msg)
            raise RuntimeError(msg)

        if self.feature_names is None:
            self._feature_names = feature_names

        check = (self.one_hot_categories is not None
                 and all(np.isin(feature_names, self.input_feature_names)))
        if check:
            self._check_one_hot_feature_names(feature_names)
            kwargs.update({
                'feature_names': feature_names,
                'categories': self.one_hot_categories
            })
            features = PreProcess.one_hot(features, **kwargs)
        elif self.feature_names != feature_names:
            msg = ('Expecting features with names: {}, but was provided with: '
                   '{}!'.format(self.feature_names, feature_names))
            logger.error(msg)
            raise RuntimeError(msg)

        if self.normalize_features:
            features = self.normalize(features, names=self.feature_names)

        if features.shape[1] != self.feature_dims:
            msg = ('data has {} features but expected {}'.format(
                features.shape[1], self.feature_dims))
            logger.error(msg)
            raise RuntimeError(msg)

        return features
コード例 #14
0
ファイル: base_model.py プロジェクト: jatropj/phygnn
    def _check_one_hot_feature_names(self, feature_names):
        """
        Check one_hot_feature_names, update feature_names to remove features
        that were one-hot encoded and add in new one-hot features if needed

        Parameters
        ----------
        feature_names : list
            Input feature names
        """
        one_hot_feature_names = self.make_one_hot_feature_names(
            feature_names, self.one_hot_categories)
        if one_hot_feature_names != self.feature_names:
            check_names = feature_names.copy()
            if self.label_names is not None:
                check_names += self.label_names

            PreProcess.check_one_hot_categories(self.one_hot_categories,
                                                feature_names=check_names)
            self._feature_names = one_hot_feature_names
コード例 #15
0
def test_norm_arr():
    """Test ModelBase Normalization on a dataframe"""
    features = FEATURES.values
    feature_names = FEATURES.columns.tolist()
    labels = LABELS.values
    label_names = LABELS.columns.tolist()
    model = ModelBase(None,
                      feature_names=feature_names,
                      label_names=label_names,
                      normalize=True)

    baseline, means, stdevs = PreProcess.normalize(features)
    test = model.parse_features(features, names=feature_names)
    assert np.allclose(baseline, test)
    assert np.allclose(means, model.feature_means)
    assert np.allclose(stdevs, model.feature_stdevs)

    baseline, means, stdevs = PreProcess.normalize(labels)
    test = model._parse_labels(labels, names=label_names)
    assert np.allclose(baseline, test)
    assert np.allclose(means, model.label_means)
    assert np.allclose(stdevs, model.label_stdevs)
コード例 #16
0
    def unnormalize_prediction(self, prediction):
        """
        Unnormalize prediction if needed

        Parameters
        ----------
        prediction : ndarray
           Model prediction

        Returns
        -------
        prediction : ndarray
            Native prediction
        """
        means = self.label_means[0]
        if means:
            stdevs = self.label_stdevs[0]
            prediction = PreProcess.unnormalize(prediction, means, stdevs)

        return prediction
コード例 #17
0
    def build(cls,
              p_fun,
              feature_names,
              label_names,
              normalize=(True, False),
              one_hot_categories=None,
              loss_weights=(0.5, 0.5),
              hidden_layers=None,
              input_layer=None,
              output_layer=None,
              layers_obj=None,
              metric='mae',
              initializer=None,
              optimizer=None,
              learning_rate=0.01,
              history=None,
              kernel_reg_rate=0.0,
              kernel_reg_power=1,
              bias_reg_rate=0.0,
              bias_reg_power=1,
              name=None):
        """
        Build phygnn model from given features, layers and kwargs

        Parameters
        ----------
        p_fun : function
            Physics function to guide the neural network loss function.
            This fun must take (phygnn, y_true, y_predicted, p, **p_kwargs)
            as arguments with datatypes (PhysicsGuidedNeuralNetwork, tf.Tensor,
            np.ndarray, np.ndarray). The function must return a tf.Tensor
            object with a single numeric loss value (output.ndim == 0).
        feature_names : list
            Ordered list of feature names.
        label_names : list
            Ordered list of label (output) names.
        normalize : bool | tuple, optional
            Boolean flag(s) as to whether features and labels should be
            normalized. Possible values:
            - True means normalize both
            - False means don't normalize either
            - Tuple of flags (normalize_feature, normalize_label)
            by default True
        one_hot_categories : dict, optional
            Features to one-hot encode using given categories, if None do
            not run one-hot encoding, by default None
        loss_weights : tuple, optional
            Loss weights for the neural network y_true vs y_predicted
            and for the p_fun loss, respectively. For example,
            loss_weights=(0.0, 1.0) would simplify the phygnn loss function
            to just the p_fun output.
        hidden_layers : list, optional
            List of dictionaries of key word arguments for each hidden
            layer in the NN. Dense linear layers can be input with their
            activations or separately for more explicit control over the layer
            ordering. For example, this is a valid input for hidden_layers that
            will yield 8 hidden layers (10 layers including input+output):
                [{'units': 64, 'activation': 'relu', 'dropout': 0.01},
                 {'units': 64},
                 {'batch_normalization': {'axis': -1}},
                 {'activation': 'relu'},
                 {'dropout': 0.01},
                 {'class': 'Flatten'},
                 ]
        input_layer : None | bool | dict
            Input layer. specification. Can be a dictionary similar to
            hidden_layers specifying a dense / conv / lstm layer.  Will
            default to a keras InputLayer with input shape = n_features.
            Can be False if the input layer will be included in the
            hidden_layers input.
        output_layer : None | bool | list | dict
            Output layer specification. Can be a list/dict similar to
            hidden_layers input specifying a dense layer with activation.
            For example, for a classfication problem with a single output,
            output_layer should be [{'units': 1}, {'activation': 'sigmoid'}].
            This defaults to a single dense layer with no activation
            (best for regression problems).  Can be False if the output layer
            will be included in the hidden_layers input.
        layers_obj : None | phygnn.utilities.tf_layers.Layers
            Optional initialized Layers object to set as the model layers
            including pre-set weights. This option will override the
            hidden_layers, input_layer, and output_layer arguments.
        metric : str, optional
            Loss metric option for the NN loss function (not the physical
            loss function). Must be a valid key in phygnn.loss_metrics.METRICS
        initializer : tensorflow.keras.initializers, optional
            Instantiated initializer object. None defaults to GlorotUniform
        optimizer : tensorflow.keras.optimizers | dict | None
            Instantiated tf.keras.optimizers object or a dict optimizer config
            from tf.keras.optimizers.get_config(). None defaults to Adam.
        learning_rate : float, optional
            Optimizer learning rate. Not used if optimizer input arg is a
            pre-initialized object or if optimizer input arg is a config dict.
        history : None | pd.DataFrame, optional
            Learning history if continuing a training session.
        kernel_reg_rate : float, optional
            Kernel regularization rate. Increasing this value above zero will
            add a structural loss term to the loss function that
            disincentivizes large hidden layer weights and should reduce
            model complexity. Setting this to 0.0 will disable kernel
            regularization.
        kernel_reg_power : int, optional
            Kernel regularization power. kernel_reg_power=1 is L1
            regularization (lasso regression), and kernel_reg_power=2 is L2
            regularization (ridge regression).
        bias_reg_rate : float, optional
            Bias regularization rate. Increasing this value above zero will
            add a structural loss term to the loss function that
            disincentivizes large hidden layer biases and should reduce
            model complexity. Setting this to 0.0 will disable bias
            regularization.
        bias_reg_power : int, optional
            Bias regularization power. bias_reg_power=1 is L1
            regularization (lasso regression), and bias_reg_power=2 is L2
            regularization (ridge regression).
        name : None | str
            Optional model name for debugging.

        Returns
        -------
        model : PhygnnModel
            Initialized PhygnnModel instance
        """
        if isinstance(label_names, str):
            label_names = [label_names]

        if one_hot_categories is not None:
            check_names = feature_names + label_names
            PreProcess.check_one_hot_categories(one_hot_categories,
                                                feature_names=check_names)
            feature_names = cls.make_one_hot_feature_names(
                feature_names, one_hot_categories)

        model = PhysicsGuidedNeuralNetwork(p_fun,
                                           loss_weights=loss_weights,
                                           n_features=len(feature_names),
                                           n_labels=len(label_names),
                                           hidden_layers=hidden_layers,
                                           input_layer=input_layer,
                                           output_layer=output_layer,
                                           layers_obj=layers_obj,
                                           metric=metric,
                                           initializer=initializer,
                                           optimizer=optimizer,
                                           learning_rate=learning_rate,
                                           history=history,
                                           kernel_reg_rate=kernel_reg_rate,
                                           kernel_reg_power=kernel_reg_power,
                                           bias_reg_rate=bias_reg_rate,
                                           bias_reg_power=bias_reg_power,
                                           feature_names=feature_names,
                                           output_names=label_names,
                                           name=name)

        model = cls(model,
                    feature_names=feature_names,
                    label_names=label_names,
                    normalize=normalize,
                    one_hot_categories=one_hot_categories)

        return model
コード例 #18
0
ファイル: tf_model.py プロジェクト: NREL/phygnn
    def build(cls,
              feature_names,
              label_names,
              normalize=(True, False),
              one_hot_categories=None,
              hidden_layers=None,
              learning_rate=0.001,
              loss="mean_squared_error",
              metrics=('mae', 'mse'),
              optimizer_class=Adam,
              **kwargs):
        """
        Build tensorflow sequential model from given features, layers and
        kwargs

        Parameters
        ----------
        feature_names : list
            Ordered list of feature names.
        label_names : list
            Ordered list of label (output) names.
        normalize : bool | tuple, optional
            Boolean flag(s) as to whether features and labels should be
            normalized. Possible values:
            - True means normalize both
            - False means don't normalize either
            - Tuple of flags (normalize_feature, normalize_label)
            by default True
        one_hot_categories : dict, optional
            Features to one-hot encode using given categories, if None do
            not run one-hot encoding, by default None
        hidden_layers : list, optional
            List of tensorflow layers.Dense kwargs (dictionaries)
            if None use a single linear layer, by default None
        learning_rate : float, optional
            tensorflow optimizer learning rate, by default 0.001
        loss : str, optional
            name of objective function, by default "mean_squared_error"
        metrics : list, optional
            List of metrics to be evaluated by the model during training and
            testing, by default ('mae', 'mse')
        optimizer_class : tf.keras.optimizers, optional
            Optional explicit request of optimizer. This should be a class
            that will be instantated in the TfModel.compile_model() method
            The default is the Adam optimizer
        kwargs : dict
            kwargs for tensorflow.keras.models.compile

        Returns
        -------
        model : TfModel
            Initialized TfModel obj
        """
        if isinstance(label_names, str):
            label_names = [label_names]

        if one_hot_categories is not None:
            check_names = feature_names + label_names
            PreProcess.check_one_hot_categories(one_hot_categories,
                                                feature_names=check_names)
            feature_names = cls.make_one_hot_feature_names(
                feature_names, one_hot_categories)

        model = cls.compile_model(len(feature_names),
                                  n_labels=len(label_names),
                                  hidden_layers=hidden_layers,
                                  learning_rate=learning_rate,
                                  loss=loss,
                                  metrics=metrics,
                                  optimizer_class=optimizer_class,
                                  **kwargs)

        model = cls(model,
                    feature_names=feature_names,
                    label_names=label_names,
                    normalize=normalize,
                    one_hot_categories=one_hot_categories)

        return model
コード例 #19
0
ファイル: random_forest_model.py プロジェクト: NREL/phygnn
    def build_trained(cls,
                      features,
                      label,
                      normalize=True,
                      one_hot_categories=None,
                      shuffle=True,
                      save_path=None,
                      compile_kwargs=None,
                      parse_kwargs=None,
                      fit_kwargs=None):
        """
        Build Random Forest Model with given kwargs and then train with
        given features, labels, and kwargs

        Parameters
        ----------
        features : pandas.DataFrame
            Model features
        label : pandas.DataFrame
            label to train on
        normalize : bool | tuple, optional
            Boolean flag(s) as to whether features and labels should be
            normalized. Possible values:
            - True means normalize both
            - False means don't normalize either
            - Tuple of flags (normalize_feature, normalize_label)
            by default True
        one_hot_categories : dict, optional
            Features to one-hot encode using given categories, if None do
            not run one-hot encoding, by default None
        shuffle : bool
            Flag to randomly subset the validation data and batch selection
            from features and labels.
        save_path : str
            Directory path to save model to. The RandomForest Model will be
            saved to the directory while the framework parameters will be
            saved in json.
        compile_kwargs : dict
            kwargs for sklearn.ensemble.RandomForestRegressor
        parse_kwargs : dict
            kwargs for cls.parse_features
        fit_kwargs : dict
            kwargs for sklearn.ensemble.RandomForestRegressor.fit

        Returns
        -------
        model : RandomForestModel
            Initialized and trained RandomForestModel obj
        """
        if compile_kwargs is None:
            compile_kwargs = {}

        _, feature_names = cls._parse_data(features)
        _, label_name = cls._parse_data(label)

        model = cls.compile_model(**compile_kwargs)
        if one_hot_categories is not None:
            check_names = feature_names + label_name
            PreProcess.check_one_hot_categories(one_hot_categories,
                                                feature_names=check_names)
            feature_names = cls.make_one_hot_feature_names(
                feature_names, one_hot_categories)

        model = cls(model,
                    feature_names=feature_names,
                    label_name=label_name,
                    normalize=normalize,
                    one_hot_categories=one_hot_categories)

        model.train_model(features,
                          label,
                          shuffle=shuffle,
                          parse_kwargs=parse_kwargs,
                          fit_kwargs=fit_kwargs)

        if save_path is not None:
            model.save_model(save_path)

        return model