def test_LogReg_train(self, spark_data):
        """
        Unit test for LogReg_train method

        Parameters
        ----------
        spark_data : Spark data frame
            well data frame
        """
        # Standard Data Engineering steps
        test_class = Slug_Detection(spark_data)
        test_class.timeframe(start="18-SEP-16 01:09",
                             end="18-OCT-16 09:09")  # example interval
        test_class.data_range(verbose=False)
        test_class.clean_choke(method="99")
        sd_df = test_class.df_toPandas()
        test_class.data_prep()

        test_class.LogReg_train()
        assert hasattr(test_class,
                       'log'), "log attribute must have been created"

        pred_features = test_class.RF_train()
        top_features = test_class.feature_selection(pred_features)
        test_class.LogReg_train(top_features=top_features)
        assert hasattr(
            test_class, 'logreg_features'
        ), "For this example, logreg_features must have been created"
    def test_RF_predict(self, spark_data):
        """
        Unit test for RF_predict method

        Parameters
        ----------
        spark_data : Spark data frame
            well data frame
        """
        # Standard Data Engineering steps
        test_class = Slug_Detection(spark_data)
        test_class.timeframe(start="18-SEP-16 01:09",
                             end="18-OCT-16 09:09")  # example interval
        test_class.data_range(verbose=False)
        test_class.clean_choke(method="99")
        sd_df = test_class.df_toPandas()
        test_class.data_prep()
        test_class.RF_train()

        p, s, cm = test_class.RF_predict()

        assert len(p) == len(
            test_class.y_test
        ), "Prediction list must be same size as y_test attribute"

        assert len(test_class.RF_predict(true_label=True)
                   ) == 3, "In this example, three objects must be returned"
        assert len(test_class.RF_predict(true_label=False)) == len(
            p), "In this example, only predictions are returned"
    def test_RF_train(self, spark_data):
        """
        Unit test for RF_train method

        Parameters
        ----------
        spark_data : Spark data frame
            well data frame
        """
        # Standard Data Engineering steps up to RF_train method
        test_class = Slug_Detection(spark_data)
        test_class.timeframe(start="18-SEP-16 01:09",
                             end="18-OCT-16 09:10")  # example interval
        test_class.data_range(verbose=False)
        test_class.clean_choke(method="99")
        sd_df = test_class.df_toPandas()
        test_class.data_prep()
        test_class.split_data()

        # Test method
        pred_features = test_class.RF_train()

        assert hasattr(test_class, 'rf')  # check if correct format
        assert len(pred_features) == len(test_class.X.columns), \
            "There must be as many features as in the original X attribute"
    def test_data_prep(self, spark_data):
        """
        Unit test for data_prep method

        Parameters
        ----------
        spark_data : Spark data frame
            well data frame
        """
        # Standard Data Engineering steps up to data_prep method
        test_class = Slug_Detection(spark_data)
        test_class.timeframe(start="18-SEP-16 01:09",
                             end="18-SEP-16 09:19")  # example interval
        test_class.data_range(verbose=False)
        test_class.clean_choke(method="99")
        sd_df = test_class.df_toPandas()

        test_class.data_prep()

        # enough to just test for X, if this has been created, then all other attributes have to
        assert hasattr(test_class, "X"), "Sub_df_dict has been created "
        assert len(test_class.df_list) == len(
            test_class.X), "Same number of feature vectors as data frames"
        assert len(
            test_class.X.columns
        ) == 152, "For this example, number of features should be 152"
    def test_split_data(self, spark_data):
        """
        Unit test for split_data method

        Parameters
        ----------
        spark_data : Spark data frame
            well data frame
        """
        # Standard Data Engineering steps up to split_data method
        test_class = Slug_Detection(spark_data)
        test_class.timeframe(start="18-SEP-16 01:09",
                             end="18-OCT-16 09:10")  # example interval
        test_class.data_range(verbose=False)
        test_class.clean_choke(method="99")
        sd_df = test_class.df_toPandas()
        test_class.data_prep()

        test_class.split_data()

        # assert variables exist
        assert hasattr(
            test_class,
            'X_train'), "X_train attribute data frame should have been created"
        assert hasattr(
            test_class,
            'X_test'), "X_test attribute data frame should have been created"
        assert hasattr(
            test_class,
            'y_train'), "y_train attribute data frame should have been created"
        assert hasattr(
            test_class,
            'y_test'), "y_test attribute data frame should have been created"

        # assert not empty
        assert not test_class.X_train.empty, "X_train attribute must not be empty"
        assert not test_class.X_test.empty, "X_test attribute must not be empty"
        assert test_class.y_train.size != 0, "y_train attribute must not be empty"
        assert test_class.y_test.size != 0, "y_test attribute must not be empty"

        # assert dimensions
        assert test_class.y_test.ndim == 1, "y_test attribute must be 1-D (pandas series)"
        assert test_class.y_train.ndim == 1, "y_train attribute must be 1-D (pandas series)"
        assert len(test_class.X_test.columns) == len(
            test_class.X.columns), "X_test attribute must same size as X"
        assert len(test_class.X_train.columns) == len(
            test_class.X.columns), "X_train attribute must same size as X"

        # Test test_size parameter
        try:
            test_class.split_data(test_size=6)
            print("test_size must be less than 1")
            raise ValueError
        except AssertionError:
            pass
    def test_feature_selection(self, spark_data):
        """
        Unit test for feature_selection method

        Parameters
        ----------
        spark_data : Spark data frame
            well data frame
        """
        # Standard Data Engineering steps
        test_class = Slug_Detection(spark_data)
        test_class.timeframe(start="30-SEP-16 01:09",
                             end="18-OCT-16 09:09")  # example interval
        test_class.data_range(verbose=False)
        test_class.clean_choke(method="99")
        sd_df = test_class.df_toPandas()
        test_class.data_prep()  # need to create X

        # Create example list of features scores
        feature_scores = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 1, 1, 1, 0.1, 0.1]
        feature_scores.extend((152 - len(feature_scores)) * [0])
        # equivalent feature names
        features_names = [
            'mean_WH_P_0', 'std_WH_P_0', 'mean_DH_P_0', 'std_DH_P_0',
            'mean_WH_T_0', 'std_WH_T_0', 'mean_DH_T_0', 'mean_WH_P_1',
            'std_WH_P_1', 'mean_DH_P_1', 'std_DH_P_1', 'mean_WH_T_1',
            'std_WH_T_1', 'mean_DH_T_1'
        ]

        assert len(test_class.feature_selection(
            feature_scores)) == 15, "It must be top_n sized, here 15"
        assert test_class.feature_selection(feature_scores) == ['std_WH_P_1', 'mean_WH_P_1', 'std_DH_T_0',
                                                                'mean_DH_T_0', 'std_WH_T_0', 'mean_WH_T_0',
                                                                'std_DH_P_0', 'mean_DH_P_0', 'std_WH_P_0',
                                                                'mean_WH_P_0', 'mean_DH_P_1', 'std_DH_P_1',
                                                                'mean_WH_T_1', 'std_WH_T_1', 'mean_DH_T_1'], \
            "In this example, the following list og feature names is expected"

        assert test_class.feature_selection(feature_scores, top_n=3) == ['std_WH_P_1', 'mean_WH_P_1', 'std_DH_T_0'], \
            "In this example, the following list og feature names is expected"
    def test_LogReg_pred(self, spark_data):
        """
        Unit test for LogReg_pred method

        Parameters
        ----------
        spark_data : Spark data frame
            well data frame
        """
        # Standard Data Engineering steps
        test_class = Slug_Detection(spark_data)
        test_class.timeframe(start="18-SEP-16 01:09",
                             end="18-OCT-16 09:09")  # example interval
        test_class.data_range(verbose=False)
        test_class.clean_choke(method="99")
        sd_df = test_class.df_toPandas()
        test_class.data_prep()

        test_class.LogReg_train()
        pred, prob, s, cm = test_class.LogReg_pred()
        assert len(pred) == len(
            test_class.y_test
        ), "Prediction list must be same size as y_test attribute"

        assert len(test_class.LogReg_pred(true_label=True)
                   ) == 4, "In this example, four objects must be returned"
        assert len(test_class.LogReg_pred(true_label=False)
                   ) == 2, "In this example, two objects must be returned"

        pred_features = test_class.RF_train()
        top_features = test_class.feature_selection(pred_features)
        test_class.LogReg_train(top_features=top_features)
        pred, prob, s, cm = test_class.LogReg_pred()

        assert len(test_class.X_test.columns) == len(
            top_features), "Top features selection must have been performed"