コード例 #1
0
 def test_cv_score_is_more_than_zero_with_LSVC_SVC_KNN_GNB_accuracy_5(self):
     # path to diabetes.csv file in project
     path = ".\\..\\datasets\\diabetes.csv"
     # get df with loader creator
     csv_type = self._loader_creator.create_loader(path, "CSV")
     df = csv_type.get_file_transformed()
     # split df into x and y
     splitter = SplitterReturner()
     x, y = splitter.split_x_y_from_df(df)
     # create a CVScore object with its path and data type
     cv_score = CVScore()
     # create a simple a svc, knn and gnb estimator
     model_1 = self._estimator_creator.create_estimator("SVC")
     model_2 = self._estimator_creator.create_estimator(
         "KNeighborsClassifier")
     model_3 = self._estimator_creator.create_estimator("GaussianNB")
     model_4 = self._estimator_creator.create_estimator("LinearSVC")
     estimators = [
         model_1, model_2, model_3,
         model_4.set_params(dual=False)
     ]
     # get score from a linearSVC estimator with accuracy score and 5folds
     bol_results = []
     for clf in estimators:
         score = cv_score.get_score(x, y, clf, "accuracy", 5)
         print(clf.__class__.__name__, "score is:", score)
         is_greater_than_zero: bool = True if score > 0 else False
         bol_results.append(is_greater_than_zero)
     # any will return True if there's any truth value in the iterable.
     print(bol_results)
     answer = all(bol_results)
     # all of this should be true
     self.assertTrue(answer)
コード例 #2
0
 def test_cv_score_is_more_than_zero_with_APROPAGATION_KMEANS_MINIKMEANS_MEANSHIFT_mutual_info_score_5(
         self):
     # path to diabetes.csv file in project
     path = ".\\..\\datasets\\iris.csv"
     # get df with loader creator
     csv_type = self._loader_creator.create_loader(path, "CSV")
     df = csv_type.get_file_transformed()
     # split df into x and y
     splitter = SplitterReturner()
     x, y = splitter.split_x_y_from_df(df)
     # create a CVScore object with its path and data type
     cv_score = CVScore()
     # create a simple a svc, knn and gnb estimator
     model_1 = self._estimator_creator.create_estimator(
         "AffinityPropagation")
     model_2 = self._estimator_creator.create_estimator("KMeans")
     model_3 = self._estimator_creator.create_estimator("MiniBatchKMeans")
     model_4 = self._estimator_creator.create_estimator("MeanShift")
     estimators = [
         model_1.set_params(random_state=0), model_2, model_3, model_4
     ]
     # get score from a linearSVC estimator with accuracy score and 5folds
     bol_results = []
     for clf in estimators:
         score = cv_score.get_score(x, y, clf, "mutual_info_score", 5)
         print(clf.__class__.__name__, "score is:", score)
         is_greater_than_zero: bool = True if score > 0 else False
         bol_results.append(is_greater_than_zero)
     print(bol_results)
コード例 #3
0
 def test_cv_score_is_more_than_zero_with_LSVR_SVR_LASSO_SGD_explained_variance_5(
         self):
     # path to diabetes.csv file in project
     path = ".\\..\\datasets\\winequality-white.csv"
     # get df with loader creator
     scsv_type = self._loader_creator.create_loader(path, "SCSV")
     df = scsv_type.get_file_transformed()
     # split df into x and y
     splitter = SplitterReturner()
     x, y = splitter.split_x_y_from_df(df)
     # create a CVScore object with its path and data type
     cv_score = CVScore()
     # create a simple a svc, knn and gnb estimator
     model_1 = self._estimator_creator.create_estimator("LinearSVR")
     model_2 = self._estimator_creator.create_estimator("SVR")
     model_3 = self._estimator_creator.create_estimator("Lasso")
     model_4 = self._estimator_creator.create_estimator("SGDClassifier")
     estimators = [model_1, model_2, model_3, model_4]
     # get score from a linearSVC estimator with accuracy score and 5folds
     bol_results = []
     for clf in estimators:
         score = cv_score.get_score(x, y, clf, "explained_variance", 5)
         print(clf.__class__.__name__, "score is:", score)
         is_greater_than_zero: bool = True if score > 0 else False
         bol_results.append(is_greater_than_zero)
     print(bol_results)
     # there is at least one true element, which means on of the scores is greater than 0
     self.assertTrue(any(bol_results))
コード例 #4
0
 def test_n_folds_validation_and_score_type_raises_ValueError(self):
     # path to diabetes.csv file in project
     path = ".\\..\\datasets\\diabetes.csv"
     # get df with loader creator
     csv_type = self._loader_creator.create_loader(path, "CSV")
     df = csv_type.get_file_transformed()
     # split df into x and y
     splitter = SplitterReturner()
     x, y = splitter.split_x_y_from_df(df)
     # create a CVScore object with its path and data type
     cv_score = CVScore()
     # create a simple linearSVC estimator
     model = self._estimator_creator.create_estimator("LinearSVC")
     model.set_params(dual=False, random_state=0)
     with self.assertRaises(ValueError):
         # get score from a linearSVC estimator with roc_auc score and 10 folds
         _ = cv_score.get_score(x, y, model, "roc", 2)
コード例 #5
0
class SBSMachineLearning(ABC):
    _data_frame = pd.DataFrame()
    _feature_selector: FeatureSelection = None
    _parameter_selector: ParameterSearch = None
    _best_features: NpArray = None
    _best_params: dict = None
    _initial_params: dict = None
    _clf: Any = None
    _cv_score: CVModelScore = CVScore()

    @property
    def data_frame(self) -> DataFrame:
        return self._data_frame

    @data_frame.setter
    def data_frame(self, value: DataFrame) -> None:
        self._data_frame = value

    @property
    def feature_selector(self) -> FeatureSelection:
        return self._feature_selector

    @feature_selector.setter
    def feature_selector(self, value: FeatureSelection) -> None:
        self._feature_selector = value

    @property
    def parameter_selector(self) -> ParameterSearch:
        return self._parameter_selector

    @parameter_selector.setter
    def parameter_selector(self, value: ParameterSearch) -> None:
        self._parameter_selector = value

    @property
    def best_features(self) -> NpArray:
        return self._best_features

    @best_features.setter
    def best_features(self, value: NpArray) -> None:
        self._best_features = value

    @property
    def best_parameters(self) -> dict:
        return self._best_params

    @best_parameters.setter
    def best_parameters(self, value: dict) -> None:
        self._best_params = value

    @property
    def initial_parameters(self) -> dict:
        return self._initial_params

    @initial_parameters.setter
    def initial_parameters(self, value: dict) -> None:
        self._initial_params = value

    @property
    def estimator(self) -> Any:
        return self._clf

    @estimator.setter
    def estimator(self, value: Any) -> None:
        self._clf = value

    @abstractmethod
    def score_model(self, score_type: str, n_folds_validation: int) -> float:
        pass
コード例 #6
0
class FeatureSelection(ABC):
    _cv_score: CVModelScore = CVScore()

    @abstractmethod
    def select_features(self, x: DataFrame, y: NpArray, model: Any, score_type: str, n_folds_validation: int) -> tuple:
        pass